From 9ac45d26974d88c78ceddd62b4e536fed0bc925d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 10:08:11 -0400 Subject: [PATCH 01/97] moving error classes/utilities to common module --- pydough/errors/__init__.py | 17 ++++++++ pydough/errors/error_types.py | 42 +++++++++++++++++++ .../errors.py => errors/error_utils.py} | 10 +---- pydough/exploration/explain.py | 2 +- pydough/exploration/term.py | 2 +- pydough/metadata/__init__.py | 2 - .../collections/collection_metadata.py | 7 ++-- .../collections/simple_table_metadata.py | 4 +- pydough/metadata/graphs/graph_metadata.py | 3 +- pydough/metadata/parse.py | 7 ++-- .../properties/cartesian_product_metadata.py | 4 +- .../properties/general_join_metadata.py | 4 +- .../metadata/properties/property_metadata.py | 6 +-- .../properties/scalar_attribute_metadata.py | 2 +- .../properties/simple_join_metadata.py | 6 +-- .../subcollection_relationship_metadata.py | 2 +- .../properties/table_column_metadata.py | 7 ++-- pydough/pydough_operators/base_operator.py | 3 +- .../pydough_operators/operator_registry.py | 3 +- .../type_inference/type_verifier.py | 7 +--- pydough/qdag/__init__.py | 3 +- pydough/qdag/collections/calculate.py | 2 +- pydough/qdag/collections/collection_access.py | 2 +- pydough/qdag/collections/collection_qdag.py | 2 +- pydough/qdag/collections/global_context.py | 2 +- pydough/qdag/collections/order_by.py | 2 +- pydough/qdag/collections/partition_by.py | 2 +- pydough/qdag/collections/partition_child.py | 2 +- pydough/qdag/collections/where.py | 2 +- pydough/qdag/errors.py | 11 ----- .../expressions/back_reference_expression.py | 2 +- .../expressions/child_reference_expression.py | 2 +- pydough/qdag/expressions/column_property.py | 2 +- pydough/qdag/expressions/reference.py | 2 +- pydough/qdag/expressions/sided_reference.py | 2 +- pydough/qdag/node_builder.py | 4 +- pydough/types/array_type.py | 3 +- pydough/types/errors.py | 12 ------ pydough/types/map_type.py | 3 +- pydough/types/parse_types.py | 3 +- pydough/types/struct_type.py | 3 +- pydough/unqualified/__init__.py | 2 - pydough/unqualified/errors.py | 11 ----- pydough/unqualified/qualification.py | 2 +- pydough/unqualified/unqualified_node.py | 5 +-- tests/test_metadata_errors.py | 3 +- 46 files changed, 125 insertions(+), 106 deletions(-) create mode 100644 pydough/errors/__init__.py create mode 100644 pydough/errors/error_types.py rename pydough/{metadata/errors.py => errors/error_utils.py} (97%) delete mode 100644 pydough/qdag/errors.py delete mode 100644 pydough/types/errors.py delete mode 100644 pydough/unqualified/errors.py diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py new file mode 100644 index 000000000..1a89f4ceb --- /dev/null +++ b/pydough/errors/__init__.py @@ -0,0 +1,17 @@ +""" +Module for error handling in PyDough. +""" + +__all__ = [ + "PyDoughMetadataException", + "PyDoughQDAGException", + "PyDoughTypeException", + "PyDoughUnqualifiedException", +] + +from .error_types import ( + PyDoughMetadataException, + PyDoughQDAGException, + PyDoughTypeException, + PyDoughUnqualifiedException, +) diff --git a/pydough/errors/error_types.py b/pydough/errors/error_types.py new file mode 100644 index 000000000..5951bb2d0 --- /dev/null +++ b/pydough/errors/error_types.py @@ -0,0 +1,42 @@ +""" +Definitions of various exception classes used within PyDough. +""" + +__all__ = [ + "PyDoughMetadataException", + "PyDoughQDAGException", + "PyDoughTypeException", + "PyDoughUnqualifiedException", +] + + +class PyDoughMetadataException(Exception): + """ + Exception raised when there is an error relating to PyDough metadata, such + as an error while parsing/validating the JSON or an ill-formed pattern. + """ + + +class PyDoughUnqualifiedException(Exception): + """ + Exception raised when there is an error relating to the PyDough + unqualified form, such as a Python object that cannot be coerced or an + invalid use of a method that can be caught even without qualification. + """ + + +class PyDoughQDAGException(Exception): + """ + Exception raised when there is an error relating to a PyDough QDAG, such + as malformed arguments/structure, undefined term accesses, singular vs + plural cardinality mismatches, or other errors that can be caught during + qualification. + """ + + +class PyDoughTypeException(Exception): + """ + Exception raised when there is an error relating to PyDough types, such + as malformed inputs to a parametrized type or a string that cannot be + parsed into a type. + """ diff --git a/pydough/metadata/errors.py b/pydough/errors/error_utils.py similarity index 97% rename from pydough/metadata/errors.py rename to pydough/errors/error_utils.py index c4dddb4cd..d2a97dd46 100644 --- a/pydough/metadata/errors.py +++ b/pydough/errors/error_utils.py @@ -1,5 +1,5 @@ """ -The definitions of error-handling utilities for the PyDough metadata module. +The definitions of error-handling utilities used by PyDough """ __all__ = [ @@ -13,7 +13,6 @@ "OrCondition", "PossiblyEmptyListOf", "PossiblyEmptyMapOf", - "PyDoughMetadataException", "PyDoughPredicate", "extract_array", "extract_bool", @@ -33,12 +32,7 @@ from abc import ABC, abstractmethod - -class PyDoughMetadataException(Exception): - """Exception raised when there is an error relating to PyDough metadata, such - as an error while parsing/validating the JSON or an ill-formed pattern. - """ - +from .error_types import PyDoughMetadataException ############################################################################### # Predicate Classes diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 5393c1861..8e16afa90 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -8,6 +8,7 @@ import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughQDAGException from pydough.metadata.abstract_metadata import AbstractMetadata from pydough.metadata.collections import CollectionMetadata, SimpleTableMetadata from pydough.metadata.graphs import GraphMetadata @@ -33,7 +34,6 @@ PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, - PyDoughQDAGException, Reference, SubCollection, TableCollection, diff --git a/pydough/exploration/term.py b/pydough/exploration/term.py index f5ea0437f..4f6e62c20 100644 --- a/pydough/exploration/term.py +++ b/pydough/exploration/term.py @@ -10,6 +10,7 @@ import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughQDAGException from pydough.qdag import ( BackReferenceExpression, ChildReferenceExpression, @@ -18,7 +19,6 @@ PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, - PyDoughQDAGException, Reference, ) from pydough.unqualified import ( diff --git a/pydough/metadata/__init__.py b/pydough/metadata/__init__.py index 980b07289..00b10dab9 100644 --- a/pydough/metadata/__init__.py +++ b/pydough/metadata/__init__.py @@ -8,7 +8,6 @@ "GeneralJoinMetadata", "GraphMetadata", "PropertyMetadata", - "PyDoughMetadataException", "SimpleJoinMetadata", "SimpleTableMetadata", "SubcollectionRelationshipMetadata", @@ -17,7 +16,6 @@ ] from .collections import CollectionMetadata, SimpleTableMetadata -from .errors import PyDoughMetadataException from .graphs import GraphMetadata from .parse import parse_json_metadata_from_file from .properties import ( diff --git a/pydough/metadata/collections/collection_metadata.py b/pydough/metadata/collections/collection_metadata.py index f059798ee..85f36ec15 100644 --- a/pydough/metadata/collections/collection_metadata.py +++ b/pydough/metadata/collections/collection_metadata.py @@ -4,13 +4,13 @@ from abc import abstractmethod -from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasType, - PyDoughMetadataException, extract_string, is_valid_name, ) +from pydough.metadata.abstract_metadata import AbstractMetadata from pydough.metadata.graphs import GraphMetadata @@ -253,6 +253,7 @@ def add_properties_from_json(self, properties_json: list) -> None: scalar property that should be parsed and inserted into the collection. """ + from pydough.errors import PyDoughMetadataException from pydough.metadata.properties import TableColumnMetadata for property_json in properties_json: diff --git a/pydough/metadata/collections/simple_table_metadata.py b/pydough/metadata/collections/simple_table_metadata.py index 5dfd747ca..f568cea4e 100644 --- a/pydough/metadata/collections/simple_table_metadata.py +++ b/pydough/metadata/collections/simple_table_metadata.py @@ -3,10 +3,10 @@ table in a relational system. """ -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_object, extract_string, diff --git a/pydough/metadata/graphs/graph_metadata.py b/pydough/metadata/graphs/graph_metadata.py index 24ea12a14..6456f442b 100644 --- a/pydough/metadata/graphs/graph_metadata.py +++ b/pydough/metadata/graphs/graph_metadata.py @@ -2,8 +2,9 @@ Definition of PyDough metadata for a graph. """ +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import HasType, is_valid_name from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.errors import HasType, PyDoughMetadataException, is_valid_name class GraphMetadata(AbstractMetadata): diff --git a/pydough/metadata/parse.py b/pydough/metadata/parse.py index de44f0067..b42389d45 100644 --- a/pydough/metadata/parse.py +++ b/pydough/metadata/parse.py @@ -6,18 +6,19 @@ import json -from .collections import CollectionMetadata, SimpleTableMetadata -from .errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, HasType, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_bool, extract_string, is_json_object, is_string, ) + +from .collections import CollectionMetadata, SimpleTableMetadata from .graphs import GraphMetadata from .properties import ( CartesianProductMetadata, diff --git a/pydough/metadata/properties/cartesian_product_metadata.py b/pydough/metadata/properties/cartesian_product_metadata.py index cd2568f09..7f8fc45c2 100644 --- a/pydough/metadata/properties/cartesian_product_metadata.py +++ b/pydough/metadata/properties/cartesian_product_metadata.py @@ -6,14 +6,14 @@ __all__ = ["CartesianProductMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( NoExtraKeys, extract_array, extract_bool, extract_object, extract_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/general_join_metadata.py b/pydough/metadata/properties/general_join_metadata.py index b5eac06f8..1bbeb745b 100644 --- a/pydough/metadata/properties/general_join_metadata.py +++ b/pydough/metadata/properties/general_join_metadata.py @@ -6,14 +6,14 @@ __all__ = ["GeneralJoinMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( NoExtraKeys, extract_array, extract_bool, extract_object, extract_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/property_metadata.py b/pydough/metadata/properties/property_metadata.py index 2ccc3010c..3baf90e69 100644 --- a/pydough/metadata/properties/property_metadata.py +++ b/pydough/metadata/properties/property_metadata.py @@ -6,12 +6,12 @@ from abc import abstractmethod -from pydough.metadata.abstract_metadata import AbstractMetadata -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors.error_utils import ( HasType, is_valid_name, ) +from pydough.metadata.abstract_metadata import AbstractMetadata +from pydough.metadata.collections import CollectionMetadata class PropertyMetadata(AbstractMetadata): diff --git a/pydough/metadata/properties/scalar_attribute_metadata.py b/pydough/metadata/properties/scalar_attribute_metadata.py index 09857fca2..9de7d0881 100644 --- a/pydough/metadata/properties/scalar_attribute_metadata.py +++ b/pydough/metadata/properties/scalar_attribute_metadata.py @@ -7,8 +7,8 @@ from abc import abstractmethod +from pydough.errors.error_utils import HasType from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import HasType from pydough.types import PyDoughType from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/simple_join_metadata.py b/pydough/metadata/properties/simple_join_metadata.py index e1be9ddca..3e267ddfd 100644 --- a/pydough/metadata/properties/simple_join_metadata.py +++ b/pydough/metadata/properties/simple_join_metadata.py @@ -6,17 +6,17 @@ __all__ = ["SimpleJoinMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException +from pydough.errors.error_utils import ( HasPropertyWith, NoExtraKeys, - PyDoughMetadataException, extract_array, extract_bool, extract_object, extract_string, simple_join_keys_predicate, ) +from pydough.metadata.collections import CollectionMetadata from pydough.metadata.graphs import GraphMetadata from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/subcollection_relationship_metadata.py b/pydough/metadata/properties/subcollection_relationship_metadata.py index cf6d3a5fc..c5683a87b 100644 --- a/pydough/metadata/properties/subcollection_relationship_metadata.py +++ b/pydough/metadata/properties/subcollection_relationship_metadata.py @@ -7,8 +7,8 @@ from abc import abstractmethod +from pydough.errors.error_utils import HasType, is_bool from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import HasType, is_bool from .property_metadata import PropertyMetadata diff --git a/pydough/metadata/properties/table_column_metadata.py b/pydough/metadata/properties/table_column_metadata.py index 3b4c1e5a3..d2f234a0e 100644 --- a/pydough/metadata/properties/table_column_metadata.py +++ b/pydough/metadata/properties/table_column_metadata.py @@ -6,17 +6,16 @@ __all__ = ["TableColumnMetadata"] -from pydough.metadata.collections import CollectionMetadata -from pydough.metadata.errors import ( +from pydough.errors import PyDoughMetadataException, PyDoughTypeException +from pydough.errors.error_utils import ( NoExtraKeys, - PyDoughMetadataException, extract_array, extract_object, extract_string, is_string, ) +from pydough.metadata.collections import CollectionMetadata from pydough.types import PyDoughType, parse_type_from_string -from pydough.types.errors import PyDoughTypeException from .property_metadata import PropertyMetadata from .scalar_attribute_metadata import ScalarAttributeMetadata diff --git a/pydough/pydough_operators/base_operator.py b/pydough/pydough_operators/base_operator.py index 82ccfacf9..a7750c1a3 100644 --- a/pydough/pydough_operators/base_operator.py +++ b/pydough/pydough_operators/base_operator.py @@ -7,6 +7,8 @@ from abc import abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException + from .type_inference import TypeVerifier @@ -55,7 +57,6 @@ def verify_allows_args(self, args: list[Any]) -> None: `PyDoughQDAGException` if the operator does not accept the provided arguments. """ - from pydough.qdag.errors import PyDoughQDAGException try: self.verifier.accepts(args) diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index 9ea52ae85..2042bcaf7 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -6,6 +6,8 @@ import inspect +from pydough.errors import PyDoughUnqualifiedException + from .base_operator import PyDoughOperator from .expression_operators import ( ExpressionFunctionOperator, @@ -52,7 +54,6 @@ def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: keyword arguments, or if keyword arguments are provided for an operator that does not support them. """ - from pydough.unqualified import PyDoughUnqualifiedException # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): diff --git a/pydough/pydough_operators/type_inference/type_verifier.py b/pydough/pydough_operators/type_inference/type_verifier.py index 453d361d9..59c51c960 100644 --- a/pydough/pydough_operators/type_inference/type_verifier.py +++ b/pydough/pydough_operators/type_inference/type_verifier.py @@ -14,6 +14,8 @@ from abc import ABC, abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException + class TypeVerifier(ABC): """ @@ -71,8 +73,6 @@ def num_args(self) -> int: return self._num_args def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: - from pydough.qdag.errors import PyDoughQDAGException - if len(args) != self.num_args: if error_on_fail: suffix = "argument" if self._num_args == 1 else "arguments" @@ -137,8 +137,6 @@ def high_range(self) -> int: return self._high_range def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: - from pydough.qdag.errors import PyDoughQDAGException - if not (self.low_range <= len(args) <= self.high_range): if error_on_fail: raise PyDoughQDAGException( @@ -157,7 +155,6 @@ class RequireCollection(TypeVerifier): def accepts(self, args: list[Any], error_on_fail: bool = True) -> bool: from pydough.qdag.collections import PyDoughCollectionQDAG - from pydough.qdag.errors import PyDoughQDAGException if len(args) != 1: if error_on_fail: diff --git a/pydough/qdag/__init__.py b/pydough/qdag/__init__.py index 673113ddc..809e7f1e0 100644 --- a/pydough/qdag/__init__.py +++ b/pydough/qdag/__init__.py @@ -37,6 +37,8 @@ "WindowCall", ] +from pydough.errors import PyDoughQDAGException + from .abstract_pydough_qdag import PyDoughQDAG from .collections import ( Calculate, @@ -56,7 +58,6 @@ TopK, Where, ) -from .errors import PyDoughQDAGException from .expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index eef11e9b4..7d6f6cfd9 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,8 +9,8 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, PyDoughExpressionQDAG, diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 80ac8f40c..7b0b06807 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, PropertyMetadata, @@ -16,7 +17,6 @@ ) from pydough.metadata.properties import SubcollectionRelationshipMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index aad57fc8d..c91634d62 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -13,8 +13,8 @@ import numpy as np +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index ceac79ac0..0be884f0b 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,12 +7,12 @@ __all__ = ["TableCollection"] +from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, GraphMetadata, ) from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from .collection_qdag import PyDoughCollectionQDAG diff --git a/pydough/qdag/collections/order_by.py b/pydough/qdag/collections/order_by.py index b02ab8f90..0322f5310 100644 --- a/pydough/qdag/collections/order_by.py +++ b/pydough/qdag/collections/order_by.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index a2eb7477a..c2554b83f 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,8 +9,8 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e0609e658..138dbcf3e 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, diff --git a/pydough/qdag/collections/where.py b/pydough/qdag/collections/where.py index 1c790a06d..85d81bb69 100644 --- a/pydough/qdag/collections/where.py +++ b/pydough/qdag/collections/where.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.qdag.errors import PyDoughQDAGException +from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import PyDoughExpressionQDAG from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite diff --git a/pydough/qdag/errors.py b/pydough/qdag/errors.py deleted file mode 100644 index f270f4f4e..000000000 --- a/pydough/qdag/errors.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Definitions of the exception type used in the PyDough QDAG module. -""" - -__all__ = ["PyDoughQDAGException"] - - -class PyDoughQDAGException(Exception): - """Exception raised when there is an error relating to a PyDough QDAG, such - as malformed arguments/structure. - """ diff --git a/pydough/qdag/expressions/back_reference_expression.py b/pydough/qdag/expressions/back_reference_expression.py index 70498908c..e887541ec 100644 --- a/pydough/qdag/expressions/back_reference_expression.py +++ b/pydough/qdag/expressions/back_reference_expression.py @@ -4,8 +4,8 @@ """ __all__ = ["BackReferenceExpression"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/child_reference_expression.py b/pydough/qdag/expressions/child_reference_expression.py index 5b1b7d15f..aeaa9e0de 100644 --- a/pydough/qdag/expressions/child_reference_expression.py +++ b/pydough/qdag/expressions/child_reference_expression.py @@ -8,9 +8,9 @@ from functools import cache +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from .expression_qdag import PyDoughExpressionQDAG from .reference import Reference diff --git a/pydough/qdag/expressions/column_property.py b/pydough/qdag/expressions/column_property.py index 39d77d2e4..5756f7f13 100644 --- a/pydough/qdag/expressions/column_property.py +++ b/pydough/qdag/expressions/column_property.py @@ -5,9 +5,9 @@ __all__ = ["ColumnProperty"] +from pydough.errors import PyDoughQDAGException from pydough.metadata.properties import TableColumnMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/reference.py b/pydough/qdag/expressions/reference.py index 5cc95597f..ebbb5c468 100644 --- a/pydough/qdag/expressions/reference.py +++ b/pydough/qdag/expressions/reference.py @@ -6,9 +6,9 @@ __all__ = ["Reference"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/expressions/sided_reference.py b/pydough/qdag/expressions/sided_reference.py index 6b9885d23..c8b150fcb 100644 --- a/pydough/qdag/expressions/sided_reference.py +++ b/pydough/qdag/expressions/sided_reference.py @@ -6,9 +6,9 @@ __all__ = ["SidedReference"] +from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG -from pydough.qdag.errors import PyDoughQDAGException from pydough.types import PyDoughType from .expression_qdag import PyDoughExpressionQDAG diff --git a/pydough/qdag/node_builder.py b/pydough/qdag/node_builder.py index 3bfd3e829..d20b26ad2 100644 --- a/pydough/qdag/node_builder.py +++ b/pydough/qdag/node_builder.py @@ -4,12 +4,11 @@ __all__ = ["AstNodeBuilder"] - +from pydough.errors import PyDoughMetadataException, PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, GraphMetadata, PropertyMetadata, - PyDoughMetadataException, TableColumnMetadata, ) from pydough.pydough_operators import ( @@ -33,7 +32,6 @@ TopK, Where, ) -from .errors import PyDoughQDAGException from .expressions import ( BackReferenceExpression, ChildReferenceExpression, diff --git a/pydough/types/array_type.py b/pydough/types/array_type.py index 445349e97..03da236cb 100644 --- a/pydough/types/array_type.py +++ b/pydough/types/array_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/types/errors.py b/pydough/types/errors.py deleted file mode 100644 index c360c18cc..000000000 --- a/pydough/types/errors.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Error-handling definitions for the types module. -""" - -__all__ = ["PyDoughTypeException"] - - -class PyDoughTypeException(Exception): - """Exception raised when there is an error relating to PyDough types, such - as malformed inputs to a parametrized type or a string that cannot be - parsed into a type. - """ diff --git a/pydough/types/map_type.py b/pydough/types/map_type.py index aaf6a104a..6dd0480f7 100644 --- a/pydough/types/map_type.py +++ b/pydough/types/map_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/types/parse_types.py b/pydough/types/parse_types.py index bcc741ff2..fa85e0734 100644 --- a/pydough/types/parse_types.py +++ b/pydough/types/parse_types.py @@ -5,10 +5,11 @@ __all__ = ["parse_type_from_string"] +from pydough.errors import PyDoughTypeException + from .array_type import ArrayType from .boolean_type import BooleanType from .datetime_type import DatetimeType -from .errors import PyDoughTypeException from .map_type import MapType from .numeric_type import NumericType from .pydough_type import PyDoughType diff --git a/pydough/types/struct_type.py b/pydough/types/struct_type.py index 95aba8152..7e7564bc1 100644 --- a/pydough/types/struct_type.py +++ b/pydough/types/struct_type.py @@ -6,7 +6,8 @@ import re -from .errors import PyDoughTypeException +from pydough.errors import PyDoughTypeException + from .pydough_type import PyDoughType diff --git a/pydough/unqualified/__init__.py b/pydough/unqualified/__init__.py index 39522204b..3219f1869 100644 --- a/pydough/unqualified/__init__.py +++ b/pydough/unqualified/__init__.py @@ -5,7 +5,6 @@ """ __all__ = [ - "PyDoughUnqualifiedException", "UnqualifiedAccess", "UnqualifiedBinaryOperation", "UnqualifiedCalculate", @@ -27,7 +26,6 @@ "transform_code", ] -from .errors import PyDoughUnqualifiedException from .qualification import qualify_node, qualify_term from .unqualified_node import ( UnqualifiedAccess, diff --git a/pydough/unqualified/errors.py b/pydough/unqualified/errors.py deleted file mode 100644 index 261cea209..000000000 --- a/pydough/unqualified/errors.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -Error handling definitions used for the unqualified module. -""" - -__all__ = ["PyDoughUnqualifiedException"] - - -class PyDoughUnqualifiedException(Exception): - """Exception raised when there is an error relating to the PyDough - unqualified form, such as a Python object that cannot be coerced. - """ diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index bfdaa4e6e..9d05dcca8 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -9,6 +9,7 @@ import pydough from pydough.configs import PyDoughConfigs +from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GeneralJoinMetadata, GraphMetadata from pydough.pydough_operators import get_operator_by_name from pydough.pydough_operators.expression_operators import ( @@ -40,7 +41,6 @@ ) from pydough.types import PyDoughType -from .errors import PyDoughUnqualifiedException from .unqualified_node import ( UnqualifiedAccess, UnqualifiedBest, diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 322a7ac2e..58256074e 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -27,8 +27,9 @@ from typing import Any, Union import pydough.pydough_operators as pydop +from pydough.errors import PyDoughUnqualifiedException +from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string from pydough.metadata import GraphMetadata -from pydough.metadata.errors import is_bool, is_integer, is_positive_int, is_string from pydough.pydough_operators import get_operator_by_name from pydough.types import ( ArrayType, @@ -40,8 +41,6 @@ UnknownType, ) -from .errors import PyDoughUnqualifiedException - class UnqualifiedNode(ABC): """ diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index 29c78c6a4..40ed05617 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -9,7 +9,8 @@ from pydough import parse_json_metadata_from_file from pydough.configs import PyDoughConfigs -from pydough.metadata import CollectionMetadata, GraphMetadata, PyDoughMetadataException +from pydough.errors import PyDoughMetadataException +from pydough.metadata import CollectionMetadata, GraphMetadata from pydough.unqualified import UnqualifiedNode, qualify_node, transform_code from tests.testing_utilities import graph_fetcher From 87284872bcc32fd65dc135110868569cd93d11c7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 10:38:52 -0400 Subject: [PATCH 02/97] Changed more errors to be PyDough exceptions --- pydough/configs/pydough_configs.py | 4 +- pydough/conversion/hybrid_translator.py | 3 +- pydough/conversion/hybrid_tree.py | 2 +- .../database_connectors/builtin_databases.py | 6 ++- .../database_connectors/database_connector.py | 9 ++-- .../database_connectors/empty_connection.py | 10 +++-- pydough/errors/__init__.py | 8 ++++ pydough/errors/error_types.py | 41 +++++++++++++++++-- pydough/evaluation/evaluate_unqualified.py | 14 ++++--- pydough/exploration/explain.py | 8 ++-- pydough/jupyter_extensions/pydough_magic.py | 3 +- .../expression_operators/binary_operators.py | 3 +- .../qdag/collections/collection_tree_form.py | 6 ++- pydough/sqlglot/execute_relational.py | 10 ++--- .../sqlglot_relational_expression_visitor.py | 5 ++- .../base_transform_bindings.py | 41 ++++++++++--------- .../sqlglot_transform_utils.py | 26 ++++++++---- .../sqlite_transform_bindings.py | 11 +++-- pydough/unqualified/unqualified_node.py | 2 +- tests/conftest.py | 3 +- tests/test_documentation.py | 3 +- tests/test_sqlite_connection.py | 9 ++-- tests/testing_utilities.py | 21 +++++++--- 23 files changed, 168 insertions(+), 80 deletions(-) diff --git a/pydough/configs/pydough_configs.py b/pydough/configs/pydough_configs.py index ea93b6de5..0d895cc53 100644 --- a/pydough/configs/pydough_configs.py +++ b/pydough/configs/pydough_configs.py @@ -7,6 +7,8 @@ from enum import Enum from typing import Any, Generic, TypeVar +from pydough.errors import PyDoughSessionException + T = TypeVar("T") @@ -126,5 +128,5 @@ class PyDoughConfigs: def __setattr__(self, name: str, value: Any) -> None: if name not in dir(self): - raise AttributeError(f"Unrecognized PyDough config name: {name}") + raise PyDoughSessionException(f"Unrecognized PyDough config name: {name}") super().__setattr__(name, value) diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index b376ce944..18e4e8de3 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -9,6 +9,7 @@ import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect +from pydough.errors import PyDoughSQLException from pydough.metadata import ( CartesianProductMetadata, GeneralJoinMetadata, @@ -806,7 +807,7 @@ def rewrite_quantile_call( or not isinstance(expr.args[1].literal.value, (int, float)) or not (0.0 <= float(expr.args[1].literal.value) <= 1.0) ): - raise ValueError( + raise PyDoughSQLException( f"Expected second argument to QUANTILE to be a numeric literal between 0 and 1, instead received {expr.args[1]!r}" ) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 9a52f3e50..749eb3d71 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -605,7 +605,7 @@ def add_successor(self, successor: "HybridTree") -> None: `successor`: the HybridTree to be marked as one level below `self`. """ if self._successor is not None: - raise Exception("Duplicate successor") + raise ValueError("Duplicate successor") self._successor = successor successor._parent = self # Shift the aggregation keys and rhs of join keys back by 1 level to diff --git a/pydough/database_connectors/builtin_databases.py b/pydough/database_connectors/builtin_databases.py index 1fc483db4..29069b908 100644 --- a/pydough/database_connectors/builtin_databases.py +++ b/pydough/database_connectors/builtin_databases.py @@ -5,6 +5,8 @@ import sqlite3 +from pydough.errors import PyDoughSessionException + from .database_connector import DatabaseConnection, DatabaseContext, DatabaseDialect __all__ = ["load_database_context", "load_sqlite_connection"] @@ -31,7 +33,7 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext: connection = load_sqlite_connection(**kwargs) dialect = DatabaseDialect.SQLITE case _: - raise ValueError( + raise PyDoughSessionException( f"Unsupported database: {database_name}. The supported databases are: {supported_databases}." "Any other database must be created manually by specifying the connection and dialect." ) @@ -47,6 +49,6 @@ def load_sqlite_connection(**kwargs) -> DatabaseConnection: DatabaseConnection: A database connection object for SQLite. """ if "database" not in kwargs: - raise ValueError("SQLite connection requires a database path.") + raise PyDoughSessionException("SQLite connection requires a database path.") connection: sqlite3.Connection = sqlite3.connect(**kwargs) return DatabaseConnection(connection) diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index 4a171a37b..aabcb4f7c 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -3,7 +3,8 @@ by leveraging PEP 249 (Python Database API Specification v2.0). https://peps.python.org/pep-0249/ """ -# Copyright (C) 2024 Bodo Inc. All rights reserved. + +__all__ = ["DatabaseConnection", "DatabaseContext", "DatabaseDialect"] import sqlite3 from dataclasses import dataclass @@ -11,7 +12,7 @@ import pandas as pd -__all__ = ["DatabaseConnection", "DatabaseContext", "DatabaseDialect"] +from pydough.errors import PyDoughSessionException, PyDoughSQLException class DatabaseConnection: @@ -47,7 +48,7 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: cursor.execute(sql) except sqlite3.OperationalError as e: print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") - raise e + raise PyDoughSQLException(*e.args) from e column_names: list[str] = [description[0] for description in cursor.description] # No need to close the cursor, as its closed by del. # TODO: (gh #174) Cache the cursor? @@ -92,7 +93,7 @@ def from_string(dialect: str) -> "DatabaseDialect": elif dialect == "sqlite": return DatabaseDialect.SQLITE else: - raise ValueError(f"Unsupported dialect: {dialect}") + raise PyDoughSessionException(f"Unsupported dialect: {dialect}") @dataclass diff --git a/pydough/database_connectors/empty_connection.py b/pydough/database_connectors/empty_connection.py index 0629e2542..384c94d65 100644 --- a/pydough/database_connectors/empty_connection.py +++ b/pydough/database_connectors/empty_connection.py @@ -9,6 +9,8 @@ __all__ = ["empty_connection"] +from pydough.errors import PyDoughSessionException + from .database_connector import DatabaseConnection @@ -22,16 +24,16 @@ def __init__(self): pass def commit(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def close(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def rollback(self): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") def cursor(self, *args, **kwargs): - raise ValueError("No SQL Database is specified.") + raise PyDoughSessionException("No SQL Database is specified.") empty_connection: DatabaseConnection = DatabaseConnection(EmptyConnection()) diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py index 1a89f4ceb..ea9668f25 100644 --- a/pydough/errors/__init__.py +++ b/pydough/errors/__init__.py @@ -3,15 +3,23 @@ """ __all__ = [ + "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", + "PyDoughSQLException", + "PyDoughSessionException", + "PyDoughTestingException", "PyDoughTypeException", "PyDoughUnqualifiedException", ] from .error_types import ( + PyDoughException, PyDoughMetadataException, PyDoughQDAGException, + PyDoughSessionException, + PyDoughSQLException, + PyDoughTestingException, PyDoughTypeException, PyDoughUnqualifiedException, ) diff --git a/pydough/errors/error_types.py b/pydough/errors/error_types.py index 5951bb2d0..d0f743259 100644 --- a/pydough/errors/error_types.py +++ b/pydough/errors/error_types.py @@ -3,21 +3,40 @@ """ __all__ = [ + "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", + "PyDoughSQLException", + "PyDoughSessionException", + "PyDoughTestingException", "PyDoughTypeException", "PyDoughUnqualifiedException", ] -class PyDoughMetadataException(Exception): +class PyDoughException(Exception): + """ + Base class for all PyDough exceptions. + """ + + +class PyDoughSessionException(PyDoughException): + """ + Exception raised when something goes wrong with the PyDough session or + configs, such as assigning to a configuration that does not exist, or + not mounting a graph or database to the session when they are needed, + or issues with the setup of the database. + """ + + +class PyDoughMetadataException(PyDoughException): """ Exception raised when there is an error relating to PyDough metadata, such as an error while parsing/validating the JSON or an ill-formed pattern. """ -class PyDoughUnqualifiedException(Exception): +class PyDoughUnqualifiedException(PyDoughException): """ Exception raised when there is an error relating to the PyDough unqualified form, such as a Python object that cannot be coerced or an @@ -25,7 +44,7 @@ class PyDoughUnqualifiedException(Exception): """ -class PyDoughQDAGException(Exception): +class PyDoughQDAGException(PyDoughException): """ Exception raised when there is an error relating to a PyDough QDAG, such as malformed arguments/structure, undefined term accesses, singular vs @@ -34,9 +53,23 @@ class PyDoughQDAGException(Exception): """ -class PyDoughTypeException(Exception): +class PyDoughTypeException(PyDoughException): """ Exception raised when there is an error relating to PyDough types, such as malformed inputs to a parametrized type or a string that cannot be parsed into a type. """ + + +class PyDoughSQLException(PyDoughException): + """ + Exception caused by a malformation in the SQL that causes bugs during SQL + generation, SQL rewrites/optimization or, or errors during SQL execution. + """ + + +class PyDoughTestingException(PyDoughException): + """ + Exception raised within PyDough testing logic to indicate that something + has gone wrong, e.g. when the AstNodeTestInfo classes are used incorrectly. + """ diff --git a/pydough/evaluation/evaluate_unqualified.py b/pydough/evaluation/evaluate_unqualified.py index 0b17c3450..819f44882 100644 --- a/pydough/evaluation/evaluate_unqualified.py +++ b/pydough/evaluation/evaluate_unqualified.py @@ -12,6 +12,10 @@ from pydough.configs import PyDoughConfigs from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext +from pydough.errors import ( + PyDoughQDAGException, + PyDoughSessionException, +) from pydough.metadata import GraphMetadata from pydough.qdag import PyDoughCollectionQDAG, PyDoughQDAG from pydough.relational import RelationalRoot @@ -42,7 +46,7 @@ def _load_session_info( metadata = kwargs.pop("metadata") else: if pydough.active_session.metadata is None: - raise ValueError( + raise PyDoughSessionException( "Cannot evaluate Pydough without a metadata graph. " "Please call `pydough.active_session.load_metadata_graph()`." ) @@ -94,11 +98,11 @@ def _load_column_selection(kwargs: dict[str, object]) -> list[tuple[str, str]] | ) result.append((alias, column)) else: - raise TypeError( + raise PyDoughQDAGException( f"Expected `columns` argument to be a list or dictionary, found {columns_arg.__class__.__name__}" ) if len(result) == 0: - raise ValueError("Column selection must not be empty") + raise PyDoughQDAGException("Column selection must not be empty") return result @@ -124,7 +128,7 @@ def to_sql(node: UnqualifiedNode, **kwargs) -> str: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise TypeError( + raise PyDoughQDAGException( f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" ) relational: RelationalRoot = convert_ast_to_relational( @@ -157,7 +161,7 @@ def to_df(node: UnqualifiedNode, **kwargs) -> pd.DataFrame: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise TypeError( + raise PyDoughQDAGException( f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" ) relational: RelationalRoot = convert_ast_to_relational( diff --git a/pydough/exploration/explain.py b/pydough/exploration/explain.py index 8e16afa90..045bb1f02 100644 --- a/pydough/exploration/explain.py +++ b/pydough/exploration/explain.py @@ -130,7 +130,7 @@ def explain_property(property: PropertyMetadata, verbose: bool) -> str: for cond_str in conditions: lines.append(f" {cond_str}") case _: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of property: {property.__class__.__name__}" ) else: @@ -138,7 +138,7 @@ def explain_property(property: PropertyMetadata, verbose: bool) -> str: f"Use pydough.explain(graph['{collection_name}']['{property_name}'], verbose=True) to learn more details." ) case _: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of property: {property.__class__.__name__}" ) return "\n".join(lines) @@ -183,7 +183,7 @@ def explain_collection(collection: CollectionMetadata, verbose: bool) -> str: f"Unique properties of collection: {collection.unique_properties}" ) else: - raise ValueError( + raise NotImplementedError( f"Unrecognized type of collection: {collection.__class__.__name__}" ) if len(scalar_properties) == 0: @@ -527,6 +527,6 @@ def explain( case UnqualifiedNode(): return explain_unqualified(data, verbose) case _: - raise ValueError( + raise NotImplementedError( f"Cannot call pydough.explain on argument of type {data.__class__.__name__}" ) diff --git a/pydough/jupyter_extensions/pydough_magic.py b/pydough/jupyter_extensions/pydough_magic.py index e422ebc8b..9b9184118 100644 --- a/pydough/jupyter_extensions/pydough_magic.py +++ b/pydough/jupyter_extensions/pydough_magic.py @@ -10,6 +10,7 @@ ) import pydough +from pydough.errors import PyDoughSessionException from pydough.metadata import GraphMetadata from pydough.unqualified import transform_cell @@ -33,7 +34,7 @@ def pydough(self, line="", cell="", local_ns=None): cell = self.shell.var_expand(cell) graph: GraphMetadata | None = pydough.active_session.metadata if graph is None: - raise Exception( + raise PyDoughSessionException( "No active graph set in PyDough session." " Please set a graph using" " pydough.active_session.load_metadata_graph(...)" diff --git a/pydough/pydough_operators/expression_operators/binary_operators.py b/pydough/pydough_operators/expression_operators/binary_operators.py index eeed8319b..2fb74135b 100644 --- a/pydough/pydough_operators/expression_operators/binary_operators.py +++ b/pydough/pydough_operators/expression_operators/binary_operators.py @@ -6,6 +6,7 @@ from enum import Enum +from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, TypeVerifier, @@ -43,7 +44,7 @@ def from_string(s: str) -> "BinOp": for op in BinOp.__members__.values(): if s == op.value: return op - raise ValueError(f"Unrecognized operation: {s!r}") + raise PyDoughQDAGException(f"Unrecognized operation: {s!r}") BinOp.__members__.items() diff --git a/pydough/qdag/collections/collection_tree_form.py b/pydough/qdag/collections/collection_tree_form.py index d86ea7f82..be3c533ae 100644 --- a/pydough/qdag/collections/collection_tree_form.py +++ b/pydough/qdag/collections/collection_tree_form.py @@ -8,6 +8,8 @@ from typing import Union +from pydough.errors import PyDoughQDAGException + class CollectionTreeForm: """ @@ -82,7 +84,7 @@ def to_string_rows(self) -> list[str]: case (True, True): answer = [f"{self.ROOT_PARENT_PREDECESSOR} {self.item_str}"] case _: - raise Exception("Malformed collection tree form") + raise PyDoughQDAGException("Malformed collection tree form") else: answer = ( [] if self.predecessor is None else self.predecessor.to_string_rows() @@ -97,7 +99,7 @@ def to_string_rows(self) -> list[str]: case (True, True): answer.append(f"{prefix}{self.SUCCESSOR_PARENT} {self.item_str}") case _: - raise Exception("Malformed collection tree form") + raise PyDoughQDAGException("Malformed collection tree form") new_prefix: str = f"{prefix}{self.CHILD_SPACER if self.has_successor else self.PREDECESSOR_SPACER}" for idx, child in enumerate(self.nested_trees): is_last_child: bool = idx == len(self.nested_trees) - 1 diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index 0b569dad9..ac204f492 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -29,6 +29,7 @@ DatabaseContext, DatabaseDialect, ) +from pydough.errors import PyDoughSQLException from pydough.logger import get_logger from pydough.relational import RelationalRoot from pydough.relational.relational_expressions import ( @@ -66,10 +67,9 @@ def convert_relation_to_sql( try: glot_expr = apply_sqlglot_optimizer(glot_expr, relational, sqlglot_dialect) except SqlglotError as e: - print( - f"ERROR WHILE OPTIMIZING QUERY:\n{glot_expr.sql(sqlglot_dialect, pretty=True)}" - ) - raise e + sql_text: str = glot_expr.sql(sqlglot_dialect, pretty=True) + print(f"ERROR WHILE OPTIMIZING QUERY:\n{sql_text}") + raise PyDoughSQLException(*e.args) # Convert the optimized AST back to a SQL string. return glot_expr.sql(sqlglot_dialect, pretty=True) @@ -268,7 +268,7 @@ def convert_dialect_to_sqlglot(dialect: DatabaseDialect) -> SQLGlotDialect: elif dialect == DatabaseDialect.SQLITE: return SQLiteDialect() else: - raise ValueError(f"Unsupported dialect: {dialect}") + raise NotImplementedError(f"Unsupported dialect: {dialect}") def execute_df( diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index 943c4ac48..f27c2c392 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -13,6 +13,7 @@ from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect +from pydough.errors import PyDoughSQLException from pydough.relational import ( CallExpression, ColumnReference, @@ -218,7 +219,7 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No case "PREV" | "NEXT": offset = window_expression.kwargs.get("n", 1) if not isinstance(offset, int): - raise ValueError( + raise PyDoughSQLException( f"Invalid 'n' argument to {window_expression.op.function_name}: {offset!r} (expected an integer)" ) # By default, we use the LAG function. If doing NEXT, switch @@ -288,7 +289,7 @@ def visit_literal_expression(self, literal_expression: LiteralExpression) -> Non if isinstance(literal_expression.value, datetime.datetime): dt: datetime.datetime = literal_expression.value if dt.tzinfo is not None: - raise ValueError( + raise PyDoughSQLException( "PyDough does not yet support datetime values with a timezone" ) literal = sqlglot_expressions.Cast( diff --git a/pydough/sqlglot/transform_bindings/base_transform_bindings.py b/pydough/sqlglot/transform_bindings/base_transform_bindings.py index 34dd64be3..eef8faa80 100644 --- a/pydough/sqlglot/transform_bindings/base_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/base_transform_bindings.py @@ -13,6 +13,7 @@ import pydough.pydough_operators as pydop from pydough.configs import DayOfWeek, PyDoughConfigs +from pydough.errors import PyDoughSQLException from pydough.types import BooleanType, NumericType, PyDoughType, StringType from .sqlglot_transform_utils import ( @@ -575,11 +576,11 @@ def convert_slice( try: start_idx = int(start.this) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the start index being integer literal or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the start index being integer literal or absent." ) @@ -589,11 +590,11 @@ def convert_slice( try: stop_idx = int(stop.this) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the stop index being integer literal or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the stop index being integer literal or absent." ) @@ -603,15 +604,15 @@ def convert_slice( try: step_idx = int(step.this) if step_idx != 1: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) except ValueError: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) else: - raise ValueError( + raise PyDoughSQLException( "SLICE function currently only supports the step being integer literal 1 or absent." ) @@ -622,7 +623,7 @@ def convert_slice( match (start_idx, stop_idx): case (None, None): - raise string_expr + return string_expr case (_, None): assert start_idx is not None if start_idx > 0: @@ -1207,14 +1208,14 @@ def convert_round( not isinstance(args[1], sqlglot_expressions.Literal) or args[1].is_string ): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[1]} for ROUND." "The precision argument should be an integer literal." ) try: int(args[1].this) except ValueError: - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[1]} for ROUND." "The precision argument should be an integer literal." ) @@ -1281,14 +1282,14 @@ def convert_datediff( assert len(args) == 3 # Check if unit is a string. if not (isinstance(args[0], sqlglot_expressions.Literal) and args[0].is_string): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument for DATEDIFF: {args[0]!r}. It should be a string literal." ) x = self.make_datetime_arg(args[1]) y = self.make_datetime_arg(args[2]) unit: DateTimeUnit | None = DateTimeUnit.from_string(args[0].this) if unit is None: - raise ValueError(f"Unsupported argument '{unit}' for DATEDIFF.") + raise PyDoughSQLException(f"Unsupported argument '{unit}' for DATEDIFF.") answer = sqlglot_expressions.DateDiff( unit=sqlglot_expressions.Var(this=unit.value), this=y, expression=x ) @@ -1407,7 +1408,7 @@ def convert_datetime( # truncation. unit = DateTimeUnit.from_string(str(trunc_match.group(1))) if unit is None: - raise ValueError( + raise PyDoughSQLException( f"Unsupported DATETIME modifier string: {arg.this!r}" ) result = self.apply_datetime_truncation(result, unit) @@ -1419,12 +1420,14 @@ def convert_datetime( amt *= -1 unit = DateTimeUnit.from_string(str(offset_match.group(3))) if unit is None: - raise ValueError( + raise PyDoughSQLException( f"Unsupported DATETIME modifier string: {arg.this!r}" ) result = self.apply_datetime_offset(result, amt, unit) else: - raise ValueError(f"Unsupported DATETIME modifier string: {arg.this!r}") + raise PyDoughSQLException( + f"Unsupported DATETIME modifier string: {arg.this!r}" + ) return result def convert_extract_datetime( @@ -1618,7 +1621,7 @@ def convert_string( not isinstance(args[1], sqlglot_expressions.Literal) or not args[1].is_string ): - raise ValueError( + raise PyDoughSQLException( f"STRING(X,Y) requires the second argument to be a string date format literal, but received {args[1]}" ) return sqlglot_expressions.TimeToStr(this=args[0], format=args[1]) @@ -1730,7 +1733,7 @@ def convert_count( elif len(args) == 1: return sqlglot_expressions.Count(this=args[0]) else: - raise ValueError(f"COUNT expects 0 or 1 argument, got {len(args)}") + raise PyDoughSQLException(f"COUNT expects 0 or 1 argument, got {len(args)}") def convert_quantile( self, args: list[SQLGlotExpression], types: list[PyDoughType] @@ -1762,8 +1765,8 @@ def convert_quantile( or args[1].is_string or not (0.0 <= float(args[1].this) <= 1.0) ): - raise ValueError( - f"QUANTILE TEST argument to be a numeric literal between 0 and 1, got {args[1]}" + raise PyDoughSQLException( + f"QUANTILE expected second argument to be a numeric literal between 0 and 1, got {args[1]}" ) percentile_disc_function: SQLGlotExpression = ( diff --git a/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py b/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py index f1c74f08a..bce06a33f 100644 --- a/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py +++ b/pydough/sqlglot/transform_bindings/sqlglot_transform_utils.py @@ -21,6 +21,8 @@ from sqlglot.expressions import Binary, Case, Concat, Is, Paren, Unary from sqlglot.expressions import Expression as SQLGlotExpression +from pydough.errors import PyDoughSQLException + PAREN_EXPRESSIONS = (Binary, Unary, Concat, Is, Case) """ The types of SQLGlot expressions that need to be wrapped in parenthesis for the @@ -165,11 +167,15 @@ def truncation_string(self) -> str: case DateTimeUnit.YEAR: return "'%Y-01-01 00:00:00'" case DateTimeUnit.QUARTER: - raise ValueError("Quarter unit does not have a truncation string.") + raise PyDoughSQLException( + "Quarter unit does not have a truncation string." + ) case DateTimeUnit.MONTH: return "'%Y-%m-01 00:00:00'" case DateTimeUnit.WEEK: - raise ValueError("Week unit does not have a truncation string.") + raise PyDoughSQLException( + "Week unit does not have a truncation string." + ) case DateTimeUnit.DAY: return "'%Y-%m-%d 00:00:00'" case DateTimeUnit.HOUR: @@ -188,7 +194,9 @@ def extraction_string(self) -> str: case DateTimeUnit.YEAR: return "'%Y'" case DateTimeUnit.QUARTER: - raise ValueError("Quarter unit does not have an extraction string.") + raise PyDoughSQLException( + "Quarter unit does not have an extraction string." + ) case DateTimeUnit.MONTH: return "'%m'" case DateTimeUnit.WEEK: @@ -257,22 +265,24 @@ def pad_helper( try: required_len = int(args[1].this) if required_len < 0: - raise ValueError() + raise PyDoughSQLException( + f"{pad_func} function requires the length argument to be a non-negative integer literal." + ) except ValueError: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the length argument to be a non-negative integer literal." ) else: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the length argument to be a non-negative integer literal." ) if not isinstance(args[2], sqlglot_expressions.Literal) or not args[2].is_string: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the padding argument to be a string literal of length 1." ) if len(str(args[2].this)) != 1: - raise ValueError( + raise PyDoughSQLException( f"{pad_func} function requires the padding argument to be a string literal of length 1." ) diff --git a/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py b/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py index c3de15b5a..402030443 100644 --- a/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/sqlite_transform_bindings.py @@ -10,6 +10,7 @@ from sqlglot.expressions import Expression as SQLGlotExpression import pydough.pydough_operators as pydop +from pydough.errors import PyDoughSQLException from pydough.types import DatetimeType, NumericType, PyDoughType, StringType from .base_transform_bindings import BaseTransformBindings @@ -84,11 +85,11 @@ def convert_datediff( ) -> SQLGlotExpression: assert len(args) == 3 if not isinstance(args[0], sqlglot_expressions.Literal): - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." ) elif not args[0].is_string: - raise ValueError( + raise PyDoughSQLException( f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." ) unit: DateTimeUnit | None = DateTimeUnit.from_string(args[0].this) @@ -291,7 +292,9 @@ def convert_datediff( ) return secs_diff case _: - raise ValueError(f"Unsupported argument '{unit}' for DATEDIFF.") + raise PyDoughSQLException( + f"Unsupported argument '{unit}' for DATEDIFF." + ) def convert_quarter( self, @@ -580,7 +583,7 @@ def convert_variance( ) ) else: - raise ValueError(f"Unsupported type: {type}") + raise PyDoughSQLException(f"Unsupported typ for variance/std: {type}") def convert_std( self, args: list[SQLGlotExpression], types: list[PyDoughType], type: str diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 58256074e..bf41f0e6c 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -108,7 +108,7 @@ def __setattr__(self, name: str, value: object) -> None: super().__setattr__(name, value) else: # TODO: support using setattr to add/mutate properties. - raise AttributeError( + raise PyDoughUnqualifiedException( "PyDough objects do not yet support writing properties to them." ) diff --git a/tests/conftest.py b/tests/conftest.py index b9a7a6fb3..95a050e2c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ DatabaseDialect, empty_connection, ) +from pydough.errors import PyDoughTestingException from pydough.metadata.graphs import GraphMetadata from pydough.qdag import AstNodeBuilder from tests.testing_utilities import graph_fetcher @@ -121,7 +122,7 @@ def get_sample_graph( @cache def impl(name: str) -> GraphMetadata: if name not in valid_sample_graph_names: - raise Exception(f"Unrecognized graph name '{name}'") + raise PyDoughTestingException(f"Unrecognized graph name '{name}'") return pydough.parse_json_metadata_from_file( file_path=sample_graph_path, graph_name=name ) diff --git a/tests/test_documentation.py b/tests/test_documentation.py index 83fc7b113..6265f4c47 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -3,6 +3,7 @@ """ import pydough.pydough_operators as pydop +from pydough.errors import PyDoughTestingException def test_function_list(): @@ -34,6 +35,6 @@ def test_function_list(): # any that remain function_names.difference_update(headers) if function_names: - raise Exception( + raise PyDoughTestingException( "The following functions are not documented: " + ", ".join(function_names) ) diff --git a/tests/test_sqlite_connection.py b/tests/test_sqlite_connection.py index 9dea66157..4f5a2995b 100644 --- a/tests/test_sqlite_connection.py +++ b/tests/test_sqlite_connection.py @@ -13,6 +13,7 @@ DatabaseDialect, load_database_context, ) +from pydough.errors import PyDoughSessionException def test_query_execution(sqlite_people_jobs: DatabaseConnection) -> None: @@ -56,7 +57,9 @@ def test_sqlite_context_no_path() -> None: """ Test that we error if a Database path is not provided. """ - with pytest.raises(ValueError, match="SQLite connection requires a database path."): + with pytest.raises( + PyDoughSessionException, match="SQLite connection requires a database path." + ): load_database_context("sqlite") @@ -64,7 +67,7 @@ def test_sqlite_context_wrong_name() -> None: """ Test that we error if the database name is incorrect. """ - with pytest.raises(ValueError, match="Unsupported database: sqlite3"): + with pytest.raises(PyDoughSessionException, match="Unsupported database: sqlite3"): load_database_context("sqlite3", database=":memory:") @@ -94,5 +97,5 @@ def test_unsupported_database() -> None: TODO: Remove when we support mysql or move to a more generic file. """ - with pytest.raises(ValueError): + with pytest.raises(PyDoughSessionException): load_database_context("mysql", database=":memory:") diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 6f2c18507..245288119 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -37,6 +37,7 @@ from pydough.configs import PyDoughConfigs from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext +from pydough.errors import PyDoughTestingException from pydough.evaluation.evaluate_unqualified import _load_column_selection from pydough.metadata import GraphMetadata from pydough.pydough_operators import get_operator_by_name @@ -241,7 +242,9 @@ def to_string(self) -> str: case "RANKING": return f"{self.name}(by=({', '.join(collation_strings)}), levels={self.levels}{kwargs_str})" case _: - raise Exception(f"Unsupported window function {self.name}") + raise PyDoughTestingException( + f"Unsupported window function {self.name}" + ) def build( self, @@ -267,7 +270,9 @@ def build( self.kwargs, ) case _: - raise Exception(f"Unsupported window function {self.name}") + raise PyDoughTestingException( + f"Unsupported window function {self.name}" + ) class ReferenceInfo(AstNodeTestInfo): @@ -657,7 +662,9 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception("Must provide a context when building a WHERE clause.") + raise PyDoughTestingException( + "Must provide a context when building a WHERE clause." + ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) raw_where: Where = builder.build_where(context, children) cond = self.condition.build(builder, context, children) @@ -689,7 +696,9 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception("Must provide a context when building a Singular clause.") + raise PyDoughTestingException( + "Must provide a context when building a Singular clause." + ) raw_singular: Singular = builder.build_singular(context) return raw_singular @@ -727,7 +736,7 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception( + raise PyDoughTestingException( "Must provide context and children_contexts when building an ORDER BY clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) @@ -776,7 +785,7 @@ def local_build( children_contexts: list[PyDoughCollectionQDAG] | None = None, ) -> PyDoughCollectionQDAG: if context is None: - raise Exception( + raise PyDoughTestingException( "Must provide context and children_contexts when building a TOPK clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) From 675421c074789b0a74fb8226274dc2d767880a15 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 11:01:57 -0400 Subject: [PATCH 03/97] Added error builder class and integrated for term not found errors --- pydough/configs/session.py | 23 ++++++ pydough/errors/__init__.py | 2 + pydough/errors/pydough_error_builder.py | 37 ++++++++++ pydough/qdag/collections/calculate.py | 5 +- pydough/qdag/collections/collection_access.py | 7 +- pydough/qdag/collections/collection_qdag.py | 72 ++++++++++++------- pydough/qdag/collections/global_context.py | 5 +- pydough/qdag/collections/partition_by.py | 3 +- pydough/qdag/collections/partition_child.py | 8 +-- 9 files changed, 125 insertions(+), 37 deletions(-) create mode 100644 pydough/errors/pydough_error_builder.py diff --git a/pydough/configs/session.py b/pydough/configs/session.py index 128a4a145..ac197e5cd 100644 --- a/pydough/configs/session.py +++ b/pydough/configs/session.py @@ -5,6 +5,7 @@ - The active metadata graph. - Any PyDough configuration for function behavior. - Backend information (SQL dialect, Database connection, etc.) +- The error builder used to create and format exceptions In the future this session will also contain other information such as any User Defined registration for additional backend @@ -24,6 +25,7 @@ empty_connection, load_database_context, ) +from pydough.errors import PyDoughErrorBuilder from pydough.metadata import GraphMetadata, parse_json_metadata_from_file from .pydough_configs import PyDoughConfigs @@ -47,6 +49,7 @@ def __init__(self) -> None: self._database: DatabaseContext = DatabaseContext( connection=empty_connection, dialect=DatabaseDialect.ANSI ) + self._error_builder: PyDoughErrorBuilder = PyDoughErrorBuilder() @property def metadata(self) -> GraphMetadata | None: @@ -108,6 +111,26 @@ def database(self, context: DatabaseContext) -> None: """ self._database = context + @property + def error_builder(self) -> PyDoughErrorBuilder: + """ + Get the active error builder. + + Returns: + The active error builder. + """ + return self._error_builder + + @error_builder.setter + def error_builder(self, builder: PyDoughErrorBuilder) -> None: + """ + Set the active error builder context. + + Args: + The error builder to set. + """ + self._error_builder = builder + def connect_database(self, database_name: str, **kwargs) -> DatabaseContext: """ Create a new DatabaseContext and register it in the session. This returns diff --git a/pydough/errors/__init__.py b/pydough/errors/__init__.py index ea9668f25..2fc116db0 100644 --- a/pydough/errors/__init__.py +++ b/pydough/errors/__init__.py @@ -3,6 +3,7 @@ """ __all__ = [ + "PyDoughErrorBuilder", "PyDoughException", "PyDoughMetadataException", "PyDoughQDAGException", @@ -23,3 +24,4 @@ PyDoughTypeException, PyDoughUnqualifiedException, ) +from .pydough_error_builder import PyDoughErrorBuilder diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py new file mode 100644 index 000000000..cd6f423e0 --- /dev/null +++ b/pydough/errors/pydough_error_builder.py @@ -0,0 +1,37 @@ +""" +Definition of the base class for creating exceptions in PyDough. +""" + +from typing import TYPE_CHECKING + +from pydough.errors import PyDoughException, PyDoughQDAGException + +if TYPE_CHECKING: + from pydough.qdag import PyDoughCollectionQDAG + + +class PyDoughErrorBuilder: + """ + Base class for creating exceptions in PyDough. This class provides an + interface that the internals of PyDough will call to create various + exceptions. An instance of this class is installed in the PyDough active + session, telling PyDough how to create exceptions and what their messages + should contain for most situations. A subclass can be created and installed + into the session to customize the error messages. + """ + + def term_not_found( + self, collection: "PyDoughCollectionQDAG", term_name: str + ) -> PyDoughException: + """ + Creates an exception for when a term is not found in the specified collection. + + Args: + `collection`: The collection in which the term was not found. + `term_name` The name of the term that was not found. + Returns: + An exception indicating that the term was not found. + """ + return PyDoughQDAGException( + collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) + ) diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index 7d6f6cfd9..661e4f7db 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,6 +9,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -147,10 +148,10 @@ def get_expression_position(self, expr_name: str) -> int: return self.calc_term_indices[expr_name] def get_term(self, term_name: str) -> PyDoughQDAG: # type: ignore + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) if term_name in self.calc_term_values: return self.calc_term_values[term_name] - elif term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) return super().get_term(term_name) diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 7b0b06807..b4dcd1bec 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -107,6 +108,9 @@ def get_expression_position(self, expr_name: str) -> int: @cache def get_term(self, term_name: str) -> PyDoughQDAG: + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) + # Special handling of terms down-streamed from an ancestor CALCULATE # clause. if term_name in self.ancestral_mapping: @@ -131,9 +135,6 @@ def get_term(self, term_name: str) -> PyDoughQDAG: context = context.ancestor_context return Reference(context, term_name) - if term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) - return self.get_term_from_property(term_name) def get_term_from_property(self, term_name: str) -> PyDoughQDAG: diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index c91634d62..3c8360a30 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -5,7 +5,6 @@ __all__ = ["PyDoughCollectionQDAG"] -import re from abc import abstractmethod from collections.abc import Iterable from functools import cache, cached_property @@ -362,16 +361,25 @@ def to_tree_string(self) -> str: """ return "\n".join(self.to_tree_form(True).to_string_rows()) - def find_possible_name_matches(self, term_name: str) -> list[str]: + def find_possible_name_matches( + self, term_name: str, atol: int, rtol: float, min_names: int + ) -> list[str]: """ Finds and returns a list of candidate names that closely match the given name based on minimum edit distance. Args: - name (str): The name to match against the list of candidates. + `term_name`: The name to match against the list of candidates. + `atol`: The absolute tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match + atol` will be included in the results. + `rtol`: The relative tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included in the results. + `min_names`: The minimum number of names to return. Returns: - list[str]: A list of candidate names, based on the closest matches. + A list of candidate names, based on the closest matches. """ terms_distance_list: list[tuple[float, str]] = [] @@ -388,27 +396,25 @@ def find_possible_name_matches(self, term_name: str) -> list[str]: closest_match = terms_distance_list[0] - # List with all names that have a me <= closest_match + 2 - matches_within_2: list[str] = [] - # List with all names that have a me <= closest_match * 1.1 - matches_within_10_pct: list[str] = [] - # List with the top 3 closest matches (me) breaking ties by name - matches_top_3: list[str] = [name for _, name in terms_distance_list[:3]] + # List with all names that have a me <= closest_match + atol + matches_within_atol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] + atol + ] - # filtering the result - for me, name in terms_distance_list: - # all names that have a me <= closest_match + 2 - if me <= closest_match[0] + 2: - matches_within_2.append(name) + # List with all names that have a me <= closest_match * 1.1 + matches_within_rtol: list[str] = [ + name + for me, name in terms_distance_list + if me <= closest_match[0] * (1 + rtol) + ] - # all names that have a me <= closest_match * 1.1 - if me <= closest_match[0] * 1.1: - matches_within_10_pct.append(name) + # List with the top 3 closest matches (me) breaking ties by name + min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] - # returning the larger - # using + # Return whichever of the three lists is the longest, breaking ties + # lexicographically by the names within. return max( - [matches_within_2, matches_within_10_pct, matches_top_3], + [matches_within_atol, matches_within_rtol, min_matches], key=lambda x: (len(x), x), ) @@ -471,20 +477,36 @@ def min_edit_distance(s: str, t: str) -> float: return arr[previousRow, m] # Return the last computed row's last element - def name_mismatch_error(self, term_name: str) -> str: + def name_mismatch_error( + self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 + ) -> str: """ Raises a name mismatch error with suggestions if possible. + Args: - term_name (str): The name of the term that caused the error. + `term_name`: The name of the term that caused the error. + `atol`: The absolute tolerance for the minimum edit distance when + determining whether to include a term as a suggestion; any term + names with a minimum edit distance less than or equal to + `closest_match + atol` will be included as a suggestion. + `rtol`: The relative tolerance for the minimum edit distance when + determining whether to include a term as a suggestion; any term + names with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included as a suggestion. + `min_names`: The minimum number of suggestions to include. + + Returns: + A string describing the error, including suggestions if available. """ error_message: str = f"Unrecognized term of {self.to_string()}: {term_name!r}." - suggestions: list[str] = self.find_possible_name_matches(term_name=term_name) + suggestions: list[str] = self.find_possible_name_matches( + term_name=term_name, atol=atol, rtol=rtol, min_names=min_names + ) # Check if there are any suggestions to add if len(suggestions) > 0: suggestions_str: str = ", ".join(suggestions) error_message += f" Did you mean: {suggestions_str}?" - re.escape(error_message) return error_message diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index 0be884f0b..f16999388 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,6 +7,7 @@ __all__ = ["TableCollection"] +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -115,8 +116,8 @@ def get_expression_position(self, expr_name: str) -> int: raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.collections: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) return self.collections[term_name] diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index c2554b83f..7261c6116 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,6 +9,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -188,7 +189,7 @@ def get_term(self, term_name: str) -> PyDoughQDAG: elif term_name == self.child.name: return PartitionChild(self.child, self.child.name, self) else: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) + raise pydough.active_session.error_builder.term_not_found(self, term_name) def to_tree_form(self, is_last: bool) -> CollectionTreeForm: predecessor: CollectionTreeForm = self.ancestor_context.to_tree_form(is_last) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index 138dbcf3e..607525522 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,7 @@ from functools import cache -from pydough.errors import PyDoughQDAGException +import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,6 +90,9 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found(self, term_name) + if term_name in self.ancestral_mapping: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] @@ -104,9 +107,6 @@ def get_term(self, term_name: str): context = context.ancestor_context return Reference(context, term_name) - elif term_name not in self.all_terms: - raise PyDoughQDAGException(self.name_mismatch_error(term_name)) - return super().get_term(term_name) def is_singular(self, context: PyDoughCollectionQDAG) -> bool: From 097b8a0b80d5fd51bf7c3285bb0ca3e054d1be66 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 8 Jul 2025 11:28:29 -0400 Subject: [PATCH 04/97] Minor refactor to term_not_found error usage [RUN CI] --- pydough/qdag/collections/calculate.py | 4 +--- pydough/qdag/collections/collection_access.py | 4 +--- pydough/qdag/collections/collection_qdag.py | 18 ++++++++++++++++++ pydough/qdag/collections/global_context.py | 5 +---- pydough/qdag/collections/partition_by.py | 9 ++++----- pydough/qdag/collections/partition_child.py | 5 +---- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index 661e4f7db..b4d48664b 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -9,7 +9,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -148,8 +147,7 @@ def get_expression_position(self, expr_name: str) -> int: return self.calc_term_indices[expr_name] def get_term(self, term_name: str) -> PyDoughQDAG: # type: ignore - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) if term_name in self.calc_term_values: return self.calc_term_values[term_name] diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index b4dcd1bec..75f167e6e 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,7 +8,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -108,8 +107,7 @@ def get_expression_position(self, expr_name: str) -> int: @cache def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) # Special handling of terms down-streamed from an ancestor CALCULATE # clause. diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index 3c8360a30..2c4bb7610 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -12,6 +12,7 @@ import numpy as np +import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression @@ -510,3 +511,20 @@ def name_mismatch_error( error_message += f" Did you mean: {suggestions_str}?" return error_message + + def verify_term_exists(self, term_name: str) -> None: + """ + Verifies that a term exists in the collection, and raises an exception + if it does not. + + Args: + `term_name`: The name of the term to check whether it exists within + the collection. + + Raises: + `PyDoughException` if the term does not exist in the collection. + """ + if term_name not in self.all_terms: + raise pydough.active_session.error_builder.term_not_found( + collection=self, term_name=term_name + ) diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index f16999388..af034810c 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -7,7 +7,6 @@ __all__ = ["TableCollection"] -import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -116,9 +115,7 @@ def get_expression_position(self, expr_name: str) -> int: raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) - + self.verify_term_exists(term_name) return self.collections[term_name] @property diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index 7261c6116..0d315f213 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,7 +9,6 @@ from functools import cache -import pydough from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( @@ -183,13 +182,13 @@ def get_term(self, term_name: str) -> PyDoughQDAG: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] ) - elif term_name in self._key_name_indices: - term: PartitionKey = self.keys[self._key_name_indices[term_name]] - return term elif term_name == self.child.name: return PartitionChild(self.child, self.child.name, self) else: - raise pydough.active_session.error_builder.term_not_found(self, term_name) + self.verify_term_exists(term_name) + assert term_name in self._key_name_indices + term: PartitionKey = self.keys[self._key_name_indices[term_name]] + return term def to_tree_form(self, is_last: bool) -> CollectionTreeForm: predecessor: CollectionTreeForm = self.ancestor_context.to_tree_form(is_last) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index 607525522..e10222ebf 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,7 +8,6 @@ from functools import cache -import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,9 +89,7 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): - if term_name not in self.all_terms: - raise pydough.active_session.error_builder.term_not_found(self, term_name) - + self.verify_term_exists(term_name) if term_name in self.ancestral_mapping: return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] From 18ba964e7cd5a09a8e4c9efb48009538a3607a26 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 12:38:02 -0400 Subject: [PATCH 05/97] WIP --- pydough/conversion/filter_pushdown.py | 8 +- pydough/conversion/merge_projects.py | 11 +- pydough/conversion/projection_pullup.py | 143 ++++++++++++++++++ pydough/conversion/relational_converter.py | 22 +++ pydough/relational/rel_util.py | 15 +- tests/test_plan_refsols/correl_27.txt | 14 +- tests/test_plan_refsols/correl_28.txt | 14 +- tests/test_plan_refsols/correl_30.txt | 8 +- .../parts_quantity_increase_95_96.txt | 22 ++- tests/test_plan_refsols/semi_aggregate.txt | 15 +- tests/test_plan_refsols/tpch_q20.txt | 8 +- tests/test_plan_refsols/tpch_q5.txt | 34 ++--- .../defog_dealership_adv11_ansi.sql | 18 +-- .../defog_dealership_adv11_sqlite.sql | 18 +-- .../defog_dealership_adv2_ansi.sql | 6 +- .../defog_dealership_adv2_sqlite.sql | 6 +- .../defog_ewallet_adv10_ansi.sql | 4 +- .../defog_ewallet_adv10_sqlite.sql | 4 +- .../defog_ewallet_adv16_ansi.sql | 4 +- .../defog_ewallet_adv16_sqlite.sql | 4 +- .../defog_ewallet_adv1_ansi.sql | 9 +- .../defog_ewallet_adv1_sqlite.sql | 9 +- .../defog_ewallet_adv3_ansi.sql | 4 +- .../defog_ewallet_adv3_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 8 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 8 +- tests/test_sql_refsols/tpch_q5_ansi.sql | 57 +++---- tests/test_sql_refsols/tpch_q5_sqlite.sql | 57 +++---- 28 files changed, 329 insertions(+), 205 deletions(-) create mode 100644 pydough/conversion/projection_pullup.py diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index d50d9da5d..cab872b55 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -48,17 +48,21 @@ def push_filters( """ remaining_filters: set[RelationalExpression] pushable_filters: set[RelationalExpression] + new_input: RelationalNode match node: case Filter(): # Add all of the conditions from the filters pushed down this far # with the filters from the current node. If there is a window # function, materialize all of them at this point, otherwise push # all of them further. + filters = {transpose_expression(expr, node.columns) for expr in filters} filters.update(get_conjunctions(node.condition)) if contains_window(node.condition): - return build_filter(push_filters(node.input, set()), filters) + remaining_filters, pushable_filters = filters, set() else: - return push_filters(node.input, filters) + remaining_filters, pushable_filters = set(), filters + new_input = push_filters(node.input, pushable_filters) + return build_filter(new_input, remaining_filters, columns=node.columns) case Project(): if any(contains_window(expr) for expr in node.columns.values()): # If there is a window function, materialize all filters at diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 363117af5..7a22b2cad 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -267,12 +267,15 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: return node -def merge_projects(node: RelationalNode) -> RelationalNode: +def merge_projects( + node: RelationalNode, push_into_joins: bool = True +) -> RelationalNode: """ Merge adjacent projections when beneficial. Args: `node`: The current node of the relational tree. + `push_into_joins`: If True, push projections into joins when possible. Returns: The transformed version of `node` with adjacent projections merged @@ -281,11 +284,13 @@ def merge_projects(node: RelationalNode) -> RelationalNode: """ # If there is a project on top of a join, attempt to push it down into the # inputs of the join. - if isinstance(node, Project) and isinstance(node.input, Join): + if isinstance(node, Project) and isinstance(node.input, Join) and push_into_joins: node = project_join_transpose(node) # Recursively invoke the procedure on all inputs to the node. - node = node.copy(inputs=[merge_projects(input) for input in node.inputs]) + node = node.copy( + inputs=[merge_projects(input, push_into_joins) for input in node.inputs] + ) # Invoke the main merging step if the current node is a root/projection, # potentially multiple times if the projection below it that gets deleted diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py new file mode 100644 index 000000000..88bc2d6ef --- /dev/null +++ b/pydough/conversion/projection_pullup.py @@ -0,0 +1,143 @@ +""" +Logic used to pull up projections in the relational plan so function calls +happen as late as possible, ideally after filters, filtering joins, and +aggregations. +""" + +__all__ = ["pullup_projections"] + + +from pydough.relational import ( + CallExpression, + ColumnReference, + Filter, + Join, + JoinType, + LiteralExpression, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, +) +from pydough.relational.rel_util import apply_substitution, contains_window +from pydough.relational.relational_expressions.column_reference_finder import ( + ColumnReferenceFinder, +) + +from .merge_projects import merge_adjacent_projects + + +def pull_non_columns(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + new_node_columns: dict[str, RelationalExpression] = {} + new_project_columns: dict[str, RelationalExpression] = {} + needs_pull: bool = False + + for name, expr in node.columns.items(): + new_node_columns[name] = expr + match expr: + case ColumnReference(): + new_project_columns[name] = ColumnReference(name, expr.data_type) + case LiteralExpression() | CallExpression(): + new_project_columns[name] = expr + needs_pull = True + case _: + raise NotImplementedError( + f"Unsupported expression type {expr.__class__.__name__} in join columns." + ) + + if not needs_pull: + return node + + new_input: RelationalNode = node.copy(columns=new_node_columns) + return Project(input=new_input, columns=new_project_columns) + + +def pull_project_into_join(node: Join, input_index: int) -> None: + """ + TODO + """ + if not isinstance(node.inputs[input_index], Project): + return + + +def pull_project_into_filter(node: Filter) -> None: + """ + TODO + """ + if not isinstance(node.input, Project): + return + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + node.condition.accept(finder) + condition_cols: set[ColumnReference] = finder.get_column_references() + condition_names: set[str] = {col.name for col in condition_cols} + + ref_expr: ColumnReference + new_ref: ColumnReference + new_project_columns: dict[str, RelationalExpression] = {} + used_cols: set[RelationalExpression] = set() + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_project_columns[name] = expr + used_cols.add(expr) + for name, expr in project.input.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + if name in condition_names: + continue + if ref_expr not in used_cols: + new_name: str = name + idx: int = 0 + while new_name in new_project_columns: + idx += 1 + new_name = f"{name}_{idx}" + new_ref = ColumnReference(name, expr.data_type) + new_project_columns[new_name] = new_ref + transfer_substitutions[ref_expr] = new_ref + + node._input = project.copy(columns=new_project_columns) + + cond_contains_window: bool = contains_window(node.condition) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + existing_outputs: set[RelationalExpression] = set(node.columns.values()) + new_filter_columns: dict[str, RelationalExpression] = {} + for name, expr in project.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_filter_columns[name] = expr + new_expr: RelationalExpression = apply_substitution( + expr, transfer_substitutions, {} + ) + if not (cond_contains_window and contains_window(new_expr)): + if name in condition_names: + if ref_expr not in existing_outputs: + substitutions[ref_expr] = new_expr + else: + new_filter_columns[name] = new_expr + node._condition = apply_substitution(node.condition, substitutions, {}) + node._columns = new_filter_columns + + +def pullup_projections(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + # Recursively invoke the procedure on all inputs to the node. + node = node.copy(inputs=[pullup_projections(input) for input in node.inputs]) + match node: + case RelationalRoot() | Project(): + return merge_adjacent_projects(node) + case Join(): + pull_project_into_join(node, 0) + if node.join_type == JoinType.INNER: + pull_project_into_join(node, 1) + return pull_non_columns(node) + case Filter(): + # pull_project_into_filter(node) + return pull_non_columns(node) + case _: + return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c990260c8..b2f0372a4 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1447,6 +1447,28 @@ def optimize_relational_tree( # Step 9: re-run projection merging. root = confirm_root(merge_projects(root)) + """ + # Step 6: bubble up names from the leaf nodes to further encourage simpler + # naming without aliases, and also to delete duplicate columns where + # possible. + root = bubble_column_names(root) + + # Step 7: run projection pullup. + root = confirm_root(pullup_projections(root)) + + # Step 8: prune unused columns. + root = ColumnPruner().prune_unused_columns(root) + + # Step 9: re-run filter pushdown + root._input = push_filters(root.input, set()) + + # Step 10: re-run projection merging, without pushing into joins. + root = confirm_root(merge_projects(root, push_into_joins=False)) + + # Step 11: re-run column pruning. + root = ColumnPruner().prune_unused_columns(root) + """ + return root diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index b2681b181..c0423f91e 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -38,6 +38,7 @@ Filter, Join, JoinType, + Project, RelationalNode, ) @@ -244,7 +245,9 @@ def passthrough_column_mapping(node: RelationalNode) -> dict[str, RelationalExpr def build_filter( - node: RelationalNode, filters: set[RelationalExpression] + node: RelationalNode, + filters: set[RelationalExpression], + columns: dict[str, RelationalExpression] | None = None, ) -> RelationalNode: """ Build a filter node with the given filters on top of an input node. @@ -252,6 +255,9 @@ def build_filter( Args: `node`: The input node to build the filter on top of. `filters`: The set of filters to apply. + `columns`: An optional mapping of the column mapping to use on the + built filter node. If not provided, uses the passthrough column mapping + of `node`. Returns: A filter node with the given filters applied on top of `node`. If @@ -263,6 +269,9 @@ def build_filter( filters.discard(LiteralExpression(True, BooleanType())) condition: RelationalExpression if len(filters) == 0: + # If columns was provided, use it to create a Project node + if columns is not None: + return Project(node, columns) return node # Detect whether the filter can be pushed into a join condition. If so, @@ -301,7 +310,9 @@ def build_filter( # Otherwise, just return a new filter node with the new condition on top # of the existing node. - return Filter(node, condition, passthrough_column_mapping(node)) + if columns is None: + columns = passthrough_column_mapping(node) + return Filter(node, condition, columns) def transpose_expression( diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 2923d9a3c..1d43f55ec 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_selected_purchases': t0.n_selected_purchases, 'nation_name': t0.nation_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) + JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index ffeb0deba..8c226d189 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_selected_purchases': t0.n_selected_purchases, 'nation_name': t0.nation_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) +ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) + JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 8da8d15fa..46f53cd79 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', region_name), ('nation_name', nation_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(region_name):asc_first, (nation_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_above_avg_customers': t0.n_above_avg_customers, 'n_above_avg_suppliers': t1.n_above_avg_suppliers, 'nation_name': t0.nation_name, 'region_name': t0.region_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_nationkey': ANYTHING(n_nationkey), 'n_above_avg_customers': COUNT(), 'nation_name': ANYTHING(n_name), 'region_name': ANYTHING(region_name)}) +ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': region_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t0.region_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t1.region_name}) @@ -15,7 +15,7 @@ ROOT(columns=[('region_name', region_name), ('nation_name', nation_name), ('n_ab FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_above_avg_suppliers': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index c20fcf841..391c0d4fb 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,21 +1,19 @@ ROOT(columns=[('name', p_name), ('qty_95', qty_95), ('qty_96', qty_96)], orderings=[(ordering_2):desc_last, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'ordering_2': ordering_2, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'ordering_2': qty_96 - qty_95, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'qty_95': t0.qty_95, 'qty_96': t1.qty_96}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'qty_95': t1.qty_95}) + PROJECT(columns={'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric), 'qty_96': DEFAULT_TO(agg_1, 0:numeric)}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'l_partkey': l_partkey, 'qty_96': DEFAULT_TO(sum_l_quantity, 0:numeric)}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 001fda1d3..8b83d01e6 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', sum_price_of_10parts)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_price_of_10parts': t1.sum_price_of_10parts}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_price_of_10parts': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index d179a91c2..f025d4504 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(agg_0, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'part_qty': t1.part_qty, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'part_qty': t1.part_qty}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'agg_0': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_partkey': l_partkey}) + PROJECT(columns={'l_partkey': l_partkey, 'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index afb898a11..a6f56996d 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,21 +1,19 @@ -ROOT(columns=[('N_NAME', anything_anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_sum_sum_value, 0:numeric), 'anything_anything_n_name': anything_anything_n_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'sum_sum_sum_value': SUM(sum_sum_value)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'n_nationkey': t0.n_nationkey, 'sum_sum_value': t0.sum_sum_value}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_sum_value': SUM(sum_value)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_value': t1.sum_value}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) +ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t0.value}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t1.value}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql index 2f90c30b7..649382f8c 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql @@ -1,19 +1,11 @@ -WITH _s0 AS ( - SELECT - SUM(sale_price) AS sum_sale_price, - car_id - FROM main.sales - WHERE - EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 - GROUP BY - car_id -) SELECT ( ( - COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + COALESCE(SUM(sales.sale_price), 0) - COALESCE(SUM(cars.cost), 0) ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _s0 AS _s0 +FROM main.sales AS sales JOIN main.cars AS cars - ON _s0.car_id = cars._id + ON cars._id = sales.car_id +WHERE + EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 diff --git a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql index 3a7f5ebec..95c7af48e 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql @@ -1,19 +1,11 @@ -WITH _s0 AS ( - SELECT - SUM(sale_price) AS sum_sale_price, - car_id - FROM main.sales - WHERE - CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 - GROUP BY - car_id -) SELECT ( CAST(( - COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + COALESCE(SUM(sales.sale_price), 0) - COALESCE(SUM(cars.cost), 0) ) AS REAL) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _s0 AS _s0 +FROM main.sales AS sales JOIN main.cars AS cars - ON _s0.car_id = cars._id + ON cars._id = sales.car_id +WHERE + CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 diff --git a/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql index 59cbeb190..776eae11f 100644 --- a/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv2_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS num_sales, + COUNT(*) AS n_rows, salesperson_id FROM main.sales WHERE @@ -12,9 +12,9 @@ SELECT salespersons._id, salespersons.first_name, salespersons.last_name, - _s1.num_sales + _s1.n_rows AS num_sales FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - num_sales DESC + _s1.n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql index 71037cdc7..2f836e759 100644 --- a/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv2_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS num_sales, + COUNT(*) AS n_rows, salesperson_id FROM main.sales WHERE @@ -14,9 +14,9 @@ SELECT salespersons._id, salespersons.first_name, salespersons.last_name, - _s1.num_sales + _s1.n_rows AS num_sales FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - num_sales DESC + _s1.n_rows DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql index aab001a76..38f66aaba 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_transactions, + COUNT(*) AS n_rows, sender_id FROM main.wallet_transactions_daily WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.uid AS user_id, - _s1.total_transactions + _s1.n_rows AS total_transactions FROM main.users AS users JOIN _s1 AS _s1 ON _s1.sender_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql index aab001a76..38f66aaba 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv10_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_transactions, + COUNT(*) AS n_rows, sender_id FROM main.wallet_transactions_daily WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.uid AS user_id, - _s1.total_transactions + _s1.n_rows AS total_transactions FROM main.users AS users JOIN _s1 AS _s1 ON _s1.sender_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql index 5dec70a8c..9f171f00c 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_unread_notifs, + COUNT(*) AS n_rows, user_id FROM main.notifications WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.username, - _s1.total_unread_notifs + _s1.n_rows AS total_unread_notifs FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql index 5dec70a8c..9f171f00c 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv16_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_unread_notifs, + COUNT(*) AS n_rows, user_id FROM main.notifications WHERE @@ -10,7 +10,7 @@ WITH _s1 AS ( ) SELECT users.username, - _s1.total_unread_notifs + _s1.n_rows AS total_unread_notifs FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql index cea476af4..05ebb109f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_ansi.sql @@ -1,8 +1,7 @@ WITH _s1 AS ( SELECT - ( - COUNT(DISTINCT coupon_id) * 1.0 - ) / COUNT(DISTINCT txid) AS cpur, + COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, + COUNT(DISTINCT txid) AS ndistinct_txid, receiver_id FROM main.wallet_transactions_daily WHERE @@ -12,7 +11,9 @@ WITH _s1 AS ( ) SELECT merchants.name, - _s1.cpur AS CPUR + ( + _s1.ndistinct_coupon_id * 1.0 + ) / _s1.ndistinct_txid AS CPUR FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql index d57bb5e21..c4b8b97ad 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv1_sqlite.sql @@ -1,8 +1,7 @@ WITH _s1 AS ( SELECT - CAST(( - COUNT(DISTINCT coupon_id) * 1.0 - ) AS REAL) / COUNT(DISTINCT txid) AS cpur, + COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, + COUNT(DISTINCT txid) AS ndistinct_txid, receiver_id FROM main.wallet_transactions_daily WHERE @@ -12,7 +11,9 @@ WITH _s1 AS ( ) SELECT merchants.name, - _s1.cpur AS CPUR + CAST(( + _s1.ndistinct_coupon_id * 1.0 + ) AS REAL) / _s1.ndistinct_txid AS CPUR FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql index 0b4530b3c..2806ddea0 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_coupons, + COUNT(*) AS n_rows, merchant_id FROM main.coupons GROUP BY @@ -8,7 +8,7 @@ WITH _s1 AS ( ) SELECT merchants.name AS merchant_name, - _s1.total_coupons + _s1.n_rows AS total_coupons FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql index 0b4530b3c..2806ddea0 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv3_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS total_coupons, + COUNT(*) AS n_rows, merchant_id FROM main.coupons GROUP BY @@ -8,7 +8,7 @@ WITH _s1 AS ( ) SELECT merchants.name AS merchant_name, - _s1.total_coupons + _s1.n_rows AS total_coupons FROM main.merchants AS merchants JOIN _s1 AS _s1 ON _s1.merchant_id = merchants.mid diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 4bc325b88..6ee033739 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS agg_0, + COALESCE(SUM(l_quantity), 0) AS part_qty, l_partkey FROM tpch.lineitem WHERE @@ -9,8 +9,8 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - _s3.agg_0, - part.p_partkey + part.p_partkey, + _s3.part_qty FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.agg_0, 0) + 0.5 * COALESCE(_s5.part_qty, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index a5665d63f..ff41af883 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS agg_0, + COALESCE(SUM(l_quantity), 0) AS part_qty, l_partkey FROM tpch.lineitem WHERE @@ -9,8 +9,8 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - _s3.agg_0, - part.p_partkey + part.p_partkey, + _s3.part_qty FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.agg_0, 0) + 0.5 * COALESCE(_s5.part_qty, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q5_ansi.sql b/tests/test_sql_refsols/tpch_q5_ansi.sql index ede41e1ca..200918b17 100644 --- a/tests/test_sql_refsols/tpch_q5_ansi.sql +++ b/tests/test_sql_refsols/tpch_q5_ansi.sql @@ -1,37 +1,4 @@ -WITH _s7 AS ( - SELECT - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_value, - l_orderkey, - l_suppkey - FROM tpch.lineitem - GROUP BY - l_orderkey, - l_suppkey -), _s10 AS ( - SELECT - ANY_VALUE(nation.n_name) AS anything_n_name, - SUM(_s7.sum_value) AS sum_sum_value, - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN tpch.region AS region - ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - JOIN tpch.orders AS orders - ON customer.c_custkey = orders.o_custkey - AND orders.o_orderdate < CAST('1995-01-01' AS DATE) - AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) - JOIN _s7 AS _s7 - ON _s7.l_orderkey = orders.o_orderkey - GROUP BY - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey -), _s11 AS ( +WITH _s11 AS ( SELECT nation.n_name, supplier.s_suppkey @@ -40,12 +7,24 @@ WITH _s7 AS ( ON nation.n_nationkey = supplier.s_nationkey ) SELECT - ANY_VALUE(_s10.anything_n_name) AS N_NAME, - COALESCE(SUM(_s10.sum_sum_value), 0) AS REVENUE -FROM _s10 AS _s10 + ANY_VALUE(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < CAST('1995-01-01' AS DATE) + AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey JOIN _s11 AS _s11 - ON _s10.l_suppkey = _s11.s_suppkey AND _s10.n_name = _s11.n_name + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey GROUP BY - _s10.n_nationkey + nation.n_nationkey ORDER BY revenue DESC diff --git a/tests/test_sql_refsols/tpch_q5_sqlite.sql b/tests/test_sql_refsols/tpch_q5_sqlite.sql index 1bae1b7b7..d65176b51 100644 --- a/tests/test_sql_refsols/tpch_q5_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q5_sqlite.sql @@ -1,37 +1,4 @@ -WITH _s7 AS ( - SELECT - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_value, - l_orderkey, - l_suppkey - FROM tpch.lineitem - GROUP BY - l_orderkey, - l_suppkey -), _s10 AS ( - SELECT - MAX(nation.n_name) AS anything_n_name, - SUM(_s7.sum_value) AS sum_sum_value, - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN tpch.region AS region - ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - JOIN tpch.orders AS orders - ON customer.c_custkey = orders.o_custkey - AND orders.o_orderdate < '1995-01-01' - AND orders.o_orderdate >= '1994-01-01' - JOIN _s7 AS _s7 - ON _s7.l_orderkey = orders.o_orderkey - GROUP BY - _s7.l_suppkey, - nation.n_name, - nation.n_nationkey -), _s11 AS ( +WITH _s11 AS ( SELECT nation.n_name, supplier.s_suppkey @@ -40,12 +7,24 @@ WITH _s7 AS ( ON nation.n_nationkey = supplier.s_nationkey ) SELECT - MAX(_s10.anything_n_name) AS N_NAME, - COALESCE(SUM(_s10.sum_sum_value), 0) AS REVENUE -FROM _s10 AS _s10 + MAX(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < '1995-01-01' + AND orders.o_orderdate >= '1994-01-01' +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey JOIN _s11 AS _s11 - ON _s10.l_suppkey = _s11.s_suppkey AND _s10.n_name = _s11.n_name + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey GROUP BY - _s10.n_nationkey + nation.n_nationkey ORDER BY revenue DESC From 684b5d74052ca9114bc4e2d1cc92cd654cec9c36 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 13:02:51 -0400 Subject: [PATCH 06/97] WIP improvements on projection pullup --- pydough/conversion/projection_pullup.py | 17 +++--- pydough/conversion/relational_converter.py | 21 ++------ tests/test_plan_refsols/aggregate_anti.txt | 4 +- tests/test_plan_refsols/anti_aggregate.txt | 4 +- .../anti_aggregate_alternate.txt | 4 +- tests/test_plan_refsols/anti_singular.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 14 ++--- tests/test_plan_refsols/common_prefix_ak.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 11 ++-- tests/test_plan_refsols/correl_20.txt | 26 +++++---- tests/test_plan_refsols/correl_34.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 4 +- .../epoch_culture_events_info.txt | 6 +-- .../month_year_sliding_windows.txt | 15 +++--- .../multi_partition_access_6.txt | 53 +++++++++---------- tests/test_plan_refsols/singular_anti.txt | 4 +- .../technograph_monthly_incident_rate.txt | 10 ++-- tests/test_plan_refsols/tpch_q18.txt | 10 ++-- tests/test_plan_refsols/tpch_q22.txt | 6 +-- .../window_filter_order_10.txt | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 26 ++++----- ...chnograph_monthly_incident_rate_sqlite.sql | 26 ++++----- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++--- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++--- 24 files changed, 138 insertions(+), 159 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 88bc2d6ef..460a7e5fb 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -10,6 +10,7 @@ from pydough.relational import ( CallExpression, ColumnReference, + CorrelatedReference, Filter, Join, JoinType, @@ -18,6 +19,7 @@ RelationalExpression, RelationalNode, RelationalRoot, + WindowCallExpression, ) from pydough.relational.rel_util import apply_substitution, contains_window from pydough.relational.relational_expressions.column_reference_finder import ( @@ -38,14 +40,14 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in node.columns.items(): new_node_columns[name] = expr match expr: - case ColumnReference(): + case ColumnReference() | CorrelatedReference(): new_project_columns[name] = ColumnReference(name, expr.data_type) - case LiteralExpression() | CallExpression(): + case LiteralExpression() | CallExpression() | WindowCallExpression(): new_project_columns[name] = expr needs_pull = True case _: raise NotImplementedError( - f"Unsupported expression type {expr.__class__.__name__} in join columns." + f"Unsupported expression type {expr.__class__.__name__} in `pull_non_columns` columns." ) if not needs_pull: @@ -112,14 +114,15 @@ def pull_project_into_filter(node: Filter) -> None: new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) - if not (cond_contains_window and contains_window(new_expr)): + expr_contains_window: bool = contains_window(new_expr) + if not (cond_contains_window and expr_contains_window): if name in condition_names: if ref_expr not in existing_outputs: substitutions[ref_expr] = new_expr - else: + elif not expr_contains_window: new_filter_columns[name] = new_expr node._condition = apply_substitution(node.condition, substitutions, {}) - node._columns = new_filter_columns + # node._columns = new_filter_columns def pullup_projections(node: RelationalNode) -> RelationalNode: @@ -137,7 +140,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: pull_project_into_join(node, 1) return pull_non_columns(node) case Filter(): - # pull_project_into_filter(node) + pull_project_into_filter(node) return pull_non_columns(node) case _: return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index b2f0372a4..7c039253d 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -83,6 +83,7 @@ from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree from .merge_projects import merge_projects +from .projection_pullup import pullup_projections @dataclass @@ -1433,7 +1434,7 @@ def optimize_relational_tree( # Step 5: re-run projection merging. root = confirm_root(merge_projects(root)) - # Step 6: prune unused columns. + # Step 6: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler @@ -1441,24 +1442,9 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: re-run column pruning. - root = ColumnPruner().prune_unused_columns(root) - - # Step 9: re-run projection merging. - root = confirm_root(merge_projects(root)) - - """ - # Step 6: bubble up names from the leaf nodes to further encourage simpler - # naming without aliases, and also to delete duplicate columns where - # possible. - root = bubble_column_names(root) - - # Step 7: run projection pullup. + # Step 8: run projection pullup. root = confirm_root(pullup_projections(root)) - # Step 8: prune unused columns. - root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) @@ -1467,7 +1453,6 @@ def optimize_relational_tree( # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) - """ return root diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index b5bba6374..e1482682d 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', NULL_2), ('sum_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index b5bba6374..e1482682d 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', NULL_2), ('sum_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 4e01b49b6..af1852c80 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(NULL_2, 0:numeric)), ('sum_price_of_10parts', NULL_2)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_2': None:unknown, 's_name': t0.s_name}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('sum_price_of_10parts', None:unknown)], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index b432cef13..a66ce09a4 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', n_name), ('region_name', NULL_1)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'n_name': t0.n_name}) +ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 31d071505..00ebd7afd 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -4,13 +4,13 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'qty_shipped': t1.qty_shipped}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 905b6bb93..694228447 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index 217a786c0..a2954ac48 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,6 +1,7 @@ ROOT(columns=[('name', n_name), ('rname', NULL_4)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_4': None:unknown, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'NULL_4': None:unknown, 'n_name': n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 8c61c789f..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,16 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=domestic, columns={}) - PROJECT(columns={'domestic': name_16 == n_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 9a051c870..4ee636ab6 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -13,7 +13,7 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=l_returnflag == 'N':string & l_linestatus == 'F':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 1a0fbcdc7..6ba1a011d 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(NULL_4, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_4': None:unknown, 'r_name': t0.r_name}) +ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 2b98c54a1..be5415490 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -2,11 +2,11 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', eve LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t0.s_name, 't_name': t1.t_name}) JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year}) - PROJECT(columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index b4da40d64..c9e01e26a 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,14 +1,13 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) - FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), columns={'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 05b282309..5ee7193e5 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,26 +2,25 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -41,21 +40,19 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index 8bc55d80a..de7e0a463 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('region_name', NULL_1)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'n_name': t0.n_name}) +ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index b73085185..5912c5390 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -10,9 +10,8 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) @@ -20,9 +19,8 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index c55be44f7..fc34417b8 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,10 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'l_orderkey': l_orderkey}) PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_orderkey': l_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 6195aa0a0..06077eb2c 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -8,10 +8,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 997dfa722..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'NULL_1': None:unknown, 'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index f824db756..3aca1a585 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -4,7 +4,7 @@ WITH _t4 AS ( FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t8 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t4 AS _t7 + _s0.ca_dt + FROM _t4 AS _s0 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t11.ca_dt - FROM _t4 AS _t11 + _s8.ca_dt + FROM _t4 AS _s8 JOIN main.incidents AS incidents - ON _t11.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t12 - ON _t12.co_id = devices.de_production_country_id + JOIN _t6 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t11.ca_dt + _s8.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 4d1842b88..b9ae32845 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -4,7 +4,7 @@ WITH _t4 AS ( FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t8 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t4 AS _t7 + _s0.ca_dt + FROM _t4 AS _s0 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t11.ca_dt - FROM _t4 AS _t11 + _s8.ca_dt + FROM _t4 AS _s8 JOIN main.incidents AS incidents - ON _t11.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t12 - ON _t12.co_id = devices.de_production_country_id + JOIN _t6 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t11.ca_dt + _s8.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index c75c107e0..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t0.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t0 AS _t0 - ON NOT _t0.sum_l_quantity IS NULL - AND _t0.l_orderkey = orders.o_orderkey - AND _t0.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index c75c107e0..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t0.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t0 AS _t0 - ON NOT _t0.sum_l_quantity IS NULL - AND _t0.l_orderkey = orders.o_orderkey - AND _t0.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate From cc004ecbb4bfe17f1736733f6dfafb8a088c4dec Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 14:50:34 -0400 Subject: [PATCH 07/97] Fixing filter/join cases --- pydough/conversion/projection_pullup.py | 66 +++++++++++-------- tests/test_plan_refsols/bad_child_reuse_2.txt | 17 ++--- tests/test_plan_refsols/bad_child_reuse_3.txt | 17 ++--- tests/test_plan_refsols/common_prefix_n.txt | 49 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 53 +++++++-------- tests/test_plan_refsols/correl_24.txt | 14 ++-- .../month_year_sliding_windows.txt | 15 +++-- .../technograph_monthly_incident_rate.txt | 7 +- ..._year_cumulative_incident_rate_overall.txt | 27 ++++---- ...technograph_monthly_incident_rate_ansi.sql | 34 +++++----- ...chnograph_monthly_incident_rate_sqlite.sql | 34 +++++----- ..._cumulative_incident_rate_overall_ansi.sql | 20 +++--- ...umulative_incident_rate_overall_sqlite.sql | 20 +++--- 13 files changed, 198 insertions(+), 175 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 460a7e5fb..f211f4d0d 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -8,18 +8,14 @@ from pydough.relational import ( - CallExpression, ColumnReference, - CorrelatedReference, Filter, Join, JoinType, - LiteralExpression, Project, RelationalExpression, RelationalNode, RelationalRoot, - WindowCallExpression, ) from pydough.relational.rel_util import apply_substitution, contains_window from pydough.relational.relational_expressions.column_reference_finder import ( @@ -39,20 +35,35 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in node.columns.items(): new_node_columns[name] = expr - match expr: - case ColumnReference() | CorrelatedReference(): - new_project_columns[name] = ColumnReference(name, expr.data_type) - case LiteralExpression() | CallExpression() | WindowCallExpression(): - new_project_columns[name] = expr - needs_pull = True - case _: - raise NotImplementedError( - f"Unsupported expression type {expr.__class__.__name__} in `pull_non_columns` columns." - ) + if isinstance(expr, ColumnReference): + new_project_columns[name] = ColumnReference(name, expr.data_type) + else: + new_project_columns[name] = expr + needs_pull = True if not needs_pull: return node + existing_vals: set[RelationalExpression] = set(new_node_columns.values()) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for input_idx in range(len(node.inputs)): + input_node: RelationalNode = node.inputs[input_idx] + for name, expr in input_node.columns.items(): + ref_expr: ColumnReference = ColumnReference( + name, expr.data_type, input_name=node.default_input_aliases[input_idx] + ) + if expr not in existing_vals: + new_name: str = name + idx: int = 0 + while new_name in new_node_columns: + idx += 1 + new_name = f"{name}_{idx}" + new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) + new_node_columns[new_name] = ref_expr + substitutions[ref_expr] = new_ref + for name, expr in new_project_columns.items(): + new_project_columns[name] = apply_substitution(expr, substitutions, {}) + new_input: RelationalNode = node.copy(columns=new_node_columns) return Project(input=new_input, columns=new_project_columns) @@ -79,6 +90,11 @@ def pull_project_into_filter(node: Filter) -> None: node.condition.accept(finder) condition_cols: set[ColumnReference] = finder.get_column_references() condition_names: set[str] = {col.name for col in condition_cols} + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} ref_expr: ColumnReference new_ref: ColumnReference @@ -104,25 +120,23 @@ def pull_project_into_filter(node: Filter) -> None: node._input = project.copy(columns=new_project_columns) - cond_contains_window: bool = contains_window(node.condition) substitutions: dict[RelationalExpression, RelationalExpression] = {} - existing_outputs: set[RelationalExpression] = set(node.columns.values()) - new_filter_columns: dict[str, RelationalExpression] = {} for name, expr in project.columns.items(): ref_expr = ColumnReference(name, expr.data_type) - new_filter_columns[name] = expr new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) - expr_contains_window: bool = contains_window(new_expr) - if not (cond_contains_window and expr_contains_window): - if name in condition_names: - if ref_expr not in existing_outputs: - substitutions[ref_expr] = new_expr - elif not expr_contains_window: - new_filter_columns[name] = new_expr + if (not contains_window(new_expr)) and ( + (name in condition_names) != (name in output_names) + ): + substitutions[ref_expr] = apply_substitution( + expr, transfer_substitutions, {} + ) node._condition = apply_substitution(node.condition, substitutions, {}) - # node._columns = new_filter_columns + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } def pullup_projections(node: RelationalNode) -> RelationalNode: diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 128fe3cf1..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,10 +1,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 128fe3cf1..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,10 +1,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 71e91f37d..01c51b8e2 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,26 +1,27 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 907f12e2a..0e7f831a2 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,28 +1,29 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index af102dee8..1217865d6 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,9 +4,11 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index c9e01e26a..a0098dd14 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,13 +1,14 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), columns={'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 5912c5390..ab43e7a9a 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -3,9 +3,10 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index b34bf2502..e333d030c 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,17 +1,18 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'year': year}) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + PROJECT(columns={'n_devices': n_devices, 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 3aca1a585..593db1ec1 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t5 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t6 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -15,37 +15,37 @@ WITH _t4 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t4 AS _s0 + FROM _t5 AS _s0 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t4 AS _s8 + FROM _t5 AS _s8 JOIN main.incidents AS incidents ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY _s8.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t4 AS _t4 +FROM _t5 AS _t5 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t5.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t5.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index b9ae32845..581509700 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t5 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t6 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -15,37 +15,37 @@ WITH _t4 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t4 AS _s0 + FROM _t5 AS _s0 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY _s0.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t4 AS _s8 + FROM _t5 AS _s8 JOIN main.incidents AS incidents ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY _s8.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t4 AS _t4 +FROM _t5 AS _t5 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t5.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t5.ca_dt GROUP BY - CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 85a844a7c..a296d1e1b 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t5 AS _s0 + FROM _t6 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) GROUP BY @@ -15,23 +15,23 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t5 AS _s4 + FROM _t6 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t3 AS ( +), _t4 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) AS year - FROM _t5 AS _t5 + EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year + FROM _t6 AS _t6 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t5.ca_dt + ON _s3.ca_dt = _t6.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t6.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( @@ -57,7 +57,7 @@ WITH _t5 AS ( COALESCE(sum_expr_3, 0) AS n_devices, COALESCE(sum_n_rows, 0) AS n_incidents, year - FROM _t3 + FROM _t4 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 8003d941c..783a358a4 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t5 AS _s0 + FROM _t6 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE(devices.de_purchase_ts, 'start of day') GROUP BY @@ -15,23 +15,23 @@ WITH _t5 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t5 AS _s4 + FROM _t6 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t3 AS ( +), _t4 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) AS year - FROM _t5 AS _t5 + CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year + FROM _t6 AS _t6 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t5.ca_dt + ON _s3.ca_dt = _t6.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t6.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( @@ -57,7 +57,7 @@ WITH _t5 AS ( COALESCE(sum_expr_3, 0) AS n_devices, COALESCE(sum_n_rows, 0) AS n_incidents, year - FROM _t3 + FROM _t4 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) From de7d4e66f0b0fafbdfe34ffbeaac4ae9b548f213 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 9 Jul 2025 15:53:56 -0400 Subject: [PATCH 08/97] Bugfixes, testing for correctness [RUN CI] --- pydough/conversion/projection_pullup.py | 80 ++++++++----------- pydough/relational/rel_util.py | 2 + tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_o.txt | 10 +-- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../time_threshold_reached.txt | 4 +- .../window_filter_order_10.txt | 13 +-- .../time_threshold_reached_ansi.sql | 6 +- .../time_threshold_reached_sqlite.sql | 6 +- 13 files changed, 70 insertions(+), 79 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index f211f4d0d..07fd199d8 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -25,26 +25,13 @@ from .merge_projects import merge_adjacent_projects -def pull_non_columns(node: RelationalNode) -> RelationalNode: +def widen_columns( + node: RelationalNode, +) -> dict[RelationalExpression, RelationalExpression]: """ TODO """ - new_node_columns: dict[str, RelationalExpression] = {} - new_project_columns: dict[str, RelationalExpression] = {} - needs_pull: bool = False - - for name, expr in node.columns.items(): - new_node_columns[name] = expr - if isinstance(expr, ColumnReference): - new_project_columns[name] = ColumnReference(name, expr.data_type) - else: - new_project_columns[name] = expr - needs_pull = True - - if not needs_pull: - return node - - existing_vals: set[RelationalExpression] = set(new_node_columns.values()) + existing_vals: set[RelationalExpression] = set(node.columns.values()) substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): input_node: RelationalNode = node.inputs[input_idx] @@ -55,17 +42,39 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: if expr not in existing_vals: new_name: str = name idx: int = 0 - while new_name in new_node_columns: + while new_name in node.columns: idx += 1 new_name = f"{name}_{idx}" new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) - new_node_columns[new_name] = ref_expr + node.columns[new_name] = ref_expr substitutions[ref_expr] = new_ref + return substitutions + + +def pull_non_columns(node: RelationalNode) -> RelationalNode: + """ + TODO + """ + new_project_columns: dict[str, RelationalExpression] = {} + needs_pull: bool = False + + for name, expr in node.columns.items(): + if isinstance(expr, ColumnReference): + new_project_columns[name] = ColumnReference(name, expr.data_type) + else: + new_project_columns[name] = expr + needs_pull = True + + if not needs_pull: + return node + + substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( + node + ) for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - new_input: RelationalNode = node.copy(columns=new_node_columns) - return Project(input=new_input, columns=new_project_columns) + return Project(input=node, columns=new_project_columns) def pull_project_into_join(node: Join, input_index: int) -> None: @@ -96,39 +105,18 @@ def pull_project_into_filter(node: Filter) -> None: output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - ref_expr: ColumnReference - new_ref: ColumnReference - new_project_columns: dict[str, RelationalExpression] = {} - used_cols: set[RelationalExpression] = set() - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_project_columns[name] = expr - used_cols.add(expr) - for name, expr in project.input.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) - if name in condition_names: - continue - if ref_expr not in used_cols: - new_name: str = name - idx: int = 0 - while new_name in new_project_columns: - idx += 1 - new_name = f"{name}_{idx}" - new_ref = ColumnReference(name, expr.data_type) - new_project_columns[new_name] = new_ref - transfer_substitutions[ref_expr] = new_ref - - node._input = project.copy(columns=new_project_columns) - + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) new_expr: RelationalExpression = apply_substitution( expr, transfer_substitutions, {} ) if (not contains_window(new_expr)) and ( (name in condition_names) != (name in output_names) ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) substitutions[ref_expr] = apply_substitution( expr, transfer_substitutions, {} ) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index c0423f91e..34f485325 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -306,6 +306,8 @@ def build_filter( assert isinstance(new_join, Join) new_join.condition = condition new_join.cardinality = new_join.cardinality.add_potential_filter() + if columns is not None: + return Project(new_join, columns) return new_join # Otherwise, just return a new filter node with the new condition on top diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 1f5ca48a2..ef6c8f5ca 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 1f5ca48a2..ef6c8f5ca 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 8449c01ff..ee63e2821 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -2,8 +2,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t1.n_no_tax_discount, 'n_orders': t0.n_orders}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..126aa69c8 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 0e7f831a2..642465175 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,13 +1,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..dec2085d2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) + PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..738a980a1 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index 8b94ac8f2..e83bd7ade 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day_1}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day_1': txn_day}) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..c436e164f 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,8 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + PROJECT(columns={'o_totalprice_1': o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 853086170..08eebf4aa 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,8 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - sbtxdatetime, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day + DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day_1, + sbtxdatetime FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +15,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index 5e47efb24..c69bb3b85 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,15 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - sbtxdatetime, - DATE(sbtxdatetime, 'start of day') AS txn_day + DATE(sbtxdatetime, 'start of day') AS txn_day_1, + sbtxdatetime FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 From 38272764d9c38974e4843d1a5a375cfe978a9b4f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 03:51:18 -0400 Subject: [PATCH 09/97] Finished dealing with JOIN pull-up --- pydough/conversion/projection_pullup.py | 61 +++++++++++++++++-- pydough/conversion/relational_converter.py | 4 ++ pydough/relational/rel_util.py | 49 +++++++++++++++ .../agg_orders_by_year_month_just_europe.txt | 16 ++--- .../agg_orders_by_year_month_vs_europe.txt | 16 ++--- tests/test_plan_refsols/aggregate_semi.txt | 6 +- .../aggregate_then_backref.txt | 11 ++-- .../aggregation_analytics_2.txt | 31 +++++----- .../aggregation_analytics_3.txt | 31 +++++----- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 17 +++--- tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +- tests/test_plan_refsols/common_prefix_a.txt | 8 +-- tests/test_plan_refsols/common_prefix_ad.txt | 12 ++-- tests/test_plan_refsols/common_prefix_al.txt | 20 +++--- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_b.txt | 10 +-- tests/test_plan_refsols/common_prefix_c.txt | 41 +++++++------ tests/test_plan_refsols/common_prefix_d.txt | 35 +++++------ tests/test_plan_refsols/common_prefix_e.txt | 8 +-- tests/test_plan_refsols/common_prefix_f.txt | 10 +-- tests/test_plan_refsols/common_prefix_g.txt | 10 +-- tests/test_plan_refsols/common_prefix_h.txt | 41 +++++++------ tests/test_plan_refsols/common_prefix_i.txt | 6 +- tests/test_plan_refsols/common_prefix_m.txt | 12 ++-- tests/test_plan_refsols/common_prefix_o.txt | 6 +- tests/test_plan_refsols/common_prefix_p.txt | 9 ++- tests/test_plan_refsols/common_prefix_r.txt | 6 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_t.txt | 14 ++--- tests/test_plan_refsols/common_prefix_u.txt | 14 ++--- tests/test_plan_refsols/common_prefix_x.txt | 6 +- tests/test_plan_refsols/correl_14.txt | 17 +++--- tests/test_plan_refsols/correl_15.txt | 17 +++--- tests/test_plan_refsols/correl_17.txt | 7 +-- tests/test_plan_refsols/correl_18.txt | 13 ++-- tests/test_plan_refsols/correl_20.txt | 15 ++--- tests/test_plan_refsols/correl_24.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 8 +-- tests/test_plan_refsols/correl_27.txt | 8 +-- tests/test_plan_refsols/correl_28.txt | 8 +-- tests/test_plan_refsols/correl_29.txt | 11 ++-- tests/test_plan_refsols/correl_30.txt | 24 ++++---- tests/test_plan_refsols/correl_31.txt | 26 ++++---- .../count_cust_supplier_nation_combos.txt | 13 ++-- .../cumulative_stock_analysis.txt | 6 +- .../customer_largest_order_deltas.txt | 8 +-- .../customer_most_recent_orders.txt | 6 +- tests/test_plan_refsols/dumb_aggregation.txt | 8 +-- .../epoch_culture_events_info.txt | 16 ++--- .../test_plan_refsols/global_calc_backref.txt | 7 +-- tests/test_plan_refsols/hour_minute_day.txt | 8 +-- ...lineitems_access_cust_supplier_nations.txt | 9 ++- .../lines_shipping_vs_customer_region.txt | 11 ++-- .../month_year_sliding_windows.txt | 13 ++-- .../mostly_positive_accounts_per_nation3.txt | 8 +-- .../multi_partition_access_5.txt | 16 ++--- ...ple_simple_aggregations_multiple_calcs.txt | 17 +++--- ...ltiple_simple_aggregations_single_calc.txt | 16 +++-- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../num_positive_accounts_per_nation.txt | 15 +++-- .../orders_sum_line_price.txt | 9 ++- .../orders_sum_vs_count_line_price.txt | 9 ++- tests/test_plan_refsols/part_reduced_size.txt | 10 +-- .../region_acctbal_breakdown.txt | 6 +- .../test_plan_refsols/supplier_best_part.txt | 20 +++--- ...hnograph_incident_rate_by_release_year.txt | 18 +++--- .../technograph_monthly_incident_rate.txt | 42 ++++++------- .../technograph_most_unreliable_products.txt | 6 +- ...umulative_incident_rate_goldcopperstar.txt | 30 ++++----- ..._year_cumulative_incident_rate_overall.txt | 18 +++--- tests/test_plan_refsols/tpch_q10.txt | 14 ++--- tests/test_plan_refsols/tpch_q11.txt | 31 +++++----- tests/test_plan_refsols/tpch_q12.txt | 8 +-- tests/test_plan_refsols/tpch_q15.txt | 33 +++++----- tests/test_plan_refsols/tpch_q18.txt | 12 ++-- tests/test_plan_refsols/tpch_q20.txt | 13 ++-- tests/test_plan_refsols/tpch_q3.txt | 14 ++--- tests/test_plan_refsols/tpch_q5.txt | 30 ++++----- .../various_aggfuncs_simple.txt | 6 +- .../window_filter_order_10.txt | 13 ++-- .../year_month_nation_orders.txt | 16 ++--- .../defog_broker_adv16_ansi.sql | 13 ++-- .../defog_broker_adv16_sqlite.sql | 13 ++-- .../defog_dealership_basic5_ansi.sql | 10 +-- .../defog_dealership_basic5_sqlite.sql | 10 +-- .../defog_ewallet_adv11_ansi.sql | 11 ++-- .../defog_ewallet_adv11_sqlite.sql | 21 +++---- ...aph_incident_rate_by_release_year_ansi.sql | 18 +++--- ...h_incident_rate_by_release_year_sqlite.sql | 18 +++--- ...hnograph_most_unreliable_products_ansi.sql | 5 +- ...ograph_most_unreliable_products_sqlite.sql | 5 +- ...tive_incident_rate_goldcopperstar_ansi.sql | 16 ++--- ...ve_incident_rate_goldcopperstar_sqlite.sql | 16 ++--- ..._cumulative_incident_rate_overall_ansi.sql | 16 ++--- ...umulative_incident_rate_overall_sqlite.sql | 16 ++--- tests/test_sql_refsols/tpch_q11_ansi.sql | 14 +++-- tests/test_sql_refsols/tpch_q11_sqlite.sql | 14 +++-- tests/test_sql_refsols/tpch_q15_ansi.sql | 24 ++++---- tests/test_sql_refsols/tpch_q15_sqlite.sql | 24 ++++---- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++-- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++-- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- 104 files changed, 807 insertions(+), 720 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 07fd199d8..cdfe2a553 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -17,7 +17,11 @@ RelationalNode, RelationalRoot, ) -from pydough.relational.rel_util import apply_substitution, contains_window +from pydough.relational.rel_util import ( + add_input_name, + apply_substitution, + contains_window, +) from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) @@ -31,11 +35,15 @@ def widen_columns( """ TODO """ - existing_vals: set[RelationalExpression] = set(node.columns.values()) + existing_vals: dict[RelationalExpression, RelationalExpression] = { + expr: ColumnReference(name, expr.data_type) + for name, expr in node.columns.items() + } substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): + expr = add_input_name(expr, node.default_input_aliases[input_idx]) ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=node.default_input_aliases[input_idx] ) @@ -47,8 +55,11 @@ def widen_columns( new_name = f"{name}_{idx}" new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) node.columns[new_name] = ref_expr + existing_vals[expr] = ref_expr substitutions[ref_expr] = new_ref - return substitutions + else: + substitutions[ref_expr] = existing_vals[expr] + return {k: v for k, v in substitutions.items() if k != v} def pull_non_columns(node: RelationalNode) -> RelationalNode: @@ -71,6 +82,7 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( node ) + substitutions = {k: add_input_name(v, None) for k, v in substitutions.items()} for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) @@ -84,6 +96,45 @@ def pull_project_into_join(node: Join, input_index: int) -> None: if not isinstance(node.inputs[input_index], Project): return + project = node.inputs[input_index] + assert isinstance(project, Project) + + input_name: str | None = node.default_input_aliases[input_index] + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + node.condition.accept(finder) + condition_cols: set[ColumnReference] = finder.get_column_references() + condition_names: set[str] = {col.name for col in condition_cols} + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_expr: RelationalExpression = add_input_name( + apply_substitution(expr, transfer_substitutions, {}), input_name + ) + if (not contains_window(new_expr)) and ( + (name in condition_names) != (name in output_names) + ): + ref_expr: ColumnReference = ColumnReference( + name, expr.data_type, input_name=input_name + ) + substitutions[ref_expr] = new_expr + + node._condition = apply_substitution(node.condition, substitutions, {}) + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } + def pull_project_into_filter(node: Filter) -> None: """ @@ -117,9 +168,7 @@ def pull_project_into_filter(node: Filter) -> None: (name in condition_names) != (name in output_names) ): ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = apply_substitution( - expr, transfer_substitutions, {} - ) + substitutions[ref_expr] = new_expr node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 7c039253d..8da531ed3 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1443,7 +1443,11 @@ def optimize_relational_tree( root = bubble_column_names(root) # Step 8: run projection pullup. + # print() + # print(root.to_tree_string()) root = confirm_root(pullup_projections(root)) + # print() + # print(root.to_tree_string()) # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index 34f485325..91e5919f9 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -4,6 +4,7 @@ __all__ = [ "add_expr_uses", + "add_input_name", "apply_substitution", "bubble_uniqueness", "build_filter", @@ -764,3 +765,51 @@ def apply_substitution( # For all other cases, just return the expression as is. return expr + + +def add_input_name( + expr: RelationalExpression, input_name: str | None +) -> RelationalExpression: + """ + Adds an input name to all column references inside the given expression. + + Args: + `expr`: The expression to add the input name to its contents. + `input_name`: The input name to add. + + Returns: + The expression with the input name added to all contents, if + applicable. + """ + if isinstance(expr, ColumnReference): + return expr.with_input(input_name) + + # For call expressions, recursively transform the inputs. + if isinstance(expr, CallExpression): + return CallExpression( + expr.op, + expr.data_type, + [add_input_name(arg, input_name) for arg in expr.inputs], + ) + + # For window call expressions, recursively transform the inputs, partition + # inputs, and order inputs. + if isinstance(expr, WindowCallExpression): + return WindowCallExpression( + expr.op, + expr.data_type, + [add_input_name(arg, input_name) for arg in expr.inputs], + [add_input_name(arg, input_name) for arg in expr.partition_inputs], + [ + ExpressionSortInfo( + add_input_name(order_arg.expr, input_name), + order_arg.ascending, + order_arg.nulls_first, + ) + for order_arg in expr.order_inputs + ], + expr.kwargs, + ) + + # For all other cases, just return the expression as is. + return expr diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index bd02f42ef..e4eb7a12f 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -4,12 +4,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_ PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_custkey': o_custkey, 'year': YEAR(o_orderdate)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 1d39cc66b..8279c92ac 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -4,12 +4,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_custkey': o_custkey, 'year': YEAR(o_orderdate)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index f4f148cb8..f8dbb71a3 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('name', s_name), ('num_10parts', num_10parts), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', sum_price_of_10parts)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'num_10parts': t1.num_10parts, 's_name': t0.s_name, 'sum_price_of_10parts': t1.sum_price_of_10parts}) +ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice': avg_p_retailprice, 'num_10parts': DEFAULT_TO(n_rows, 0:numeric), 'ps_suppkey': ps_suppkey, 'sum_price_of_10parts': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + PROJECT(columns={'avg_p_retailprice_1': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_p_retailprice': sum_p_retailprice}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 0f0986ae7..8fa56b648 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / total_quantity)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'total_quantity': t0.total_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'total_quantity': t1.total_quantity}) +ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'total_quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index d0b6e3f6e..365ac3d0e 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,17 +1,18 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'revenue_generated': t0.revenue_generated}) - PROJECT(columns={'anything_ps_partkey': anything_ps_partkey, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) + JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 061e03b43..82e33e815 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,17 +1,18 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'revenue_ratio': t0.revenue_ratio}) - PROJECT(columns={'anything_ps_partkey': anything_ps_partkey, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) + JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 949ff8a23..7bc4311aa 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', avg_bal_without_debt_erasure)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_bal_without_debt_erasure': t1.avg_bal_without_debt_erasure, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'avg_bal_without_debt_erasure': sum_sum_expr_1 / sum_count_expr_1, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index ef6c8f5ca..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index ef6c8f5ca..1f5ca48a2 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows_1, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows_1': n_rows_1}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows_1': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e595a8632..94e733398 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -2,8 +2,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey}) + PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 00ebd7afd..2e01acf9c 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,10 +1,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'qty_shipped': t1.qty_shipped, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index ee63e2821..11b514f75 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,16 +1,16 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t1.n_no_tax_discount, 'n_orders': t0.n_orders}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - PROJECT(columns={'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 126aa69c8..23c6a811a 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 76bd980c0..4d1bb2447 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index b37c12250..34e0d2986 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,21 +1,22 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index d379695ad..078ad6f20 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,26 +1,23 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', n_orders_94), ('n_orders_95', n_orders_95), ('n_orders_96', n_orders_96)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders_94': t1.n_orders_94, 'n_orders_95': t1.n_orders_95, 'n_orders_96': t1.n_orders_96, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + PROJECT(columns={'n_nations': n_nations, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index d39cb9f8f..ba7632a6d 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -2,8 +2,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index c71a26a59..dd8ca64e5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 171bc4f3e..848a95bb5 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -2,12 +2,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'agg_2': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 17e99a1c8..eb6de35e5 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,21 +1,22 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'agg_0': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 0f7899049..22743c469 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', n_selected_orders)], orderings=[(n_rows):desc_last, (n_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': n_selected_orders}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'n_selected_orders': t1.n_selected_orders}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + PROJECT(columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 80566e1b0..65b095b33 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,9 +1,9 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_selected_suppliers': t1.n_selected_suppliers, 'selected_suppliers_avg': t1.selected_suppliers_avg, 'selected_suppliers_sum': t1.selected_suppliers_sum}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_suppliers': t0.n_selected_suppliers, 'selected_suppliers_avg': t0.selected_suppliers_avg, 'selected_suppliers_sum': t0.selected_suppliers_sum}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) @@ -15,5 +15,5 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_supplie SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 642465175..03557163c 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 9d56b7acd..4678a87a4 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_distinct_parts': t1.n_distinct_parts, 'n_orders': t0.n_orders, 'n_parts_ordered': t1.n_parts_ordered, 'ordering_3': t1.ordering_3}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_orders': t1.n_orders}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - PROJECT(columns={'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric)}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 7167a6bc8..355dc9ad3 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'total_spent': t1.total_spent}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index dec2085d2..9d8b97da5 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) - PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) + PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 7b3cc0556..e350ba091 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_qty': t1.total_qty}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index dd060fac2..b76d2a813 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_qty': t1.total_qty}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 30bac545e..9a5054bce 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_orders': t1.n_orders}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 7c453ab94..feffdfb1a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,18 +1,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'ps_suppkey': ps_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_1': t1.expr_1, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d31f6b6ff..6d7af3f9c 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,21 +1,20 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'ps_suppkey': ps_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_1': t1.expr_1, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 51fe077d4..20bcf38c9 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,6 +1,5 @@ ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lname': t0.lname, 'r_name': t1.r_name}) - PROJECT(columns={'lname': LOWER(n_name), 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 900ec9029..5f6ca684d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,12 +1,11 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'total_price_sum': t0.total_price_sum}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index a480ec3e3..0887475a3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,14 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey}) + FILTER(condition=n_name == n_name, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 1217865d6..d179a88b7 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) - PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index ba23e8e7a..e7c7cde56 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -2,8 +2,8 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric, 'o_orderkey': o_orderkey}) + PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) @@ -13,8 +13,8 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 1d43f55ec..ac9583af3 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) + PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -13,8 +13,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 8c226d189..3f6839aaf 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -3,16 +3,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) + PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index f290101f4..29bf6bb22 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_above_avg_customers': t0.n_above_avg_customers, 'n_above_avg_suppliers': t1.n_above_avg_suppliers}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_above_avg_customers': t0.n_above_avg_customers}) - PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric)}) + PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -14,9 +14,8 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_above_avg_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 46f53cd79..9f4248442 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,20 +1,20 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': region_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t0.region_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'region_name': t1.region_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - PROJECT(columns={'r_regionkey': r_regionkey, 'region_name': LOWER(r_name)}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 66be22e09..c868fedf4 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,17 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'revenue': t0.revenue}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'revenue': t1.revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index e1497fe3a..21bb4c0e6 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -6,16 +6,15 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_0': t0.agg_0, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'sum_l_extendedprice': t1.sum_l_extendedprice, 'year': t0.year}) - PROJECT(columns={'agg_0': 1:numeric, 'n_name': n_name, 'o_orderkey': o_orderkey, 'year': year}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'year': t1.year}) + PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index 3d1c1865b..f0c0d24b5 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', txn_within_day), ('n_buys_within_day', n_buys_within_day), ('pct_apple_txns', pct_apple_txns), ('share_change', share_change), ('rolling_avg_amount', rolling_avg_amount)], orderings=[(sbTxDateTime):asc_first]) PROJECT(columns={'n_buys_within_day': RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True), 'pct_apple_txns': ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'rolling_avg_amount': ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'sbTxDateTime': sbTxDateTime, 'share_change': RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 'txn_within_day': RELSIZE(args=[], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType, 'txn_day': t0.txn_day}) - PROJECT(columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) + PROJECT(columns={'sbTickerSymbol': sbTickerSymbol, 'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index d57301541..1fa05bd28 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'largest_diff': t1.largest_diff}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - PROJECT(columns={'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff), 'o_custkey': o_custkey}) + PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 7412f2b33..52700565c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'total_recent_value': t1.total_recent_value}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - PROJECT(columns={'o_custkey': o_custkey, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index b72dc0295..b178db074 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', a3), ('a4', a4), ('a5', a5), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'a3': t1.a3, 'a4': t1.a4, 'a5': t1.a5, 'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'n_name': n_name, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index be5415490..a7a2bb160 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,15 +1,15 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 'event_year': t0.event_year, 's_name': t1.s_name}) - PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'event_year': YEAR(ev_dt)}) + PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/global_calc_backref.txt b/tests/test_plan_refsols/global_calc_backref.txt index 4d4e194ef..6b7d52375 100644 --- a/tests/test_plan_refsols/global_calc_backref.txt +++ b/tests/test_plan_refsols/global_calc_backref.txt @@ -1,5 +1,2 @@ -ROOT(columns=[('part_name', p_name), ('is_above_cutoff', p_retailprice > a), ('is_nickel', CONTAINS(p_type, b))], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'a': t0.a, 'b': t0.b, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) - PROJECT(columns={'a': 28.15:numeric, 'b': 'NICKEL':string}) - EMPTYSINGLETON() - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) +ROOT(columns=[('part_name', p_name), ('is_above_cutoff', p_retailprice > 28.15:numeric), ('is_nickel', CONTAINS(p_type, 'NICKEL':string))], orderings=[]) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index bfacee546..0ad3b9efe 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', _expr0), ('_expr1', _expr1), ('_expr2', _expr2)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'_expr0': t0._expr0, '_expr1': t0._expr1, '_expr2': t0._expr2, 'sbTxId': t0.sbTxId}) - PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) + PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) - FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) + FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index cd92282e3..c9f1900fe 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('ship_year', ship_year), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', value)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ship_year': t0.ship_year, 'supplier_nation': t0.n_name, 'value': t0.value}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name, 'ship_year': t0.ship_year, 'value': t0.value}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'ship_year': YEAR(l_shipdate), 'value': l_extendedprice * 1.0:numeric - l_discount}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index 6d97a53f1..e06f975c8 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('order_year', order_year), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'order_year': t0.order_year, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'order_year': t0.order_year, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'order_year': t1.order_year, 'r_name': t0.r_name}) +ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'order_year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index a0098dd14..d438e6e1e 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'month_total_spent': t1.month_total_spent, 'year': t1.year}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) @@ -9,8 +9,7 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (m PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 21b348784..d185fd519 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'suppliers_in_black': t0.suppliers_in_black, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric)}) + PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index b21fc51af..bca9f137e 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) @@ -10,12 +10,12 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 5f21161b4..6227e214c 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,10 +1,11 @@ -ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', total_consumer_value_a), ('total_supplier_value', total_supplier_value_a), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'total_consumer_value_a': t0.total_consumer_value_a, 'total_supplier_value_a': t1.total_supplier_value_a}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'total_consumer_value_a': t1.total_consumer_value_a}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal': max_c_acctbal, 'total_consumer_value_a': DEFAULT_TO(sum_c_acctbal, 0:numeric)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal': max_s_acctbal, 's_nationkey': s_nationkey, 'total_supplier_value_a': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) +ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal_1, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'n_nationkey': n_nationkey, 'sum_c_acctbal': sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal_1': max_c_acctbal, 'sum_c_acctbal': sum_c_acctbal}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 5209b11c8..2e2b636f1 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,10 +1,8 @@ -ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', consumer_value), ('producer_value', producer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'consumer_value': t0.consumer_value, 'n_nationkey': t0.n_nationkey, 'producer_value': t1.producer_value}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'consumer_value': t1.consumer_value, 'n_nationkey': t0.n_nationkey}) +ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'consumer_value': DEFAULT_TO(sum_c_acctbal, 0:numeric)}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'producer_value': DEFAULT_TO(sum_s_acctbal, 0:numeric), 's_nationkey': s_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index 738a980a1..caee91153 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 12ee5e43d..7706c56b5 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'suppliers_in_black': t0.suppliers_in_black, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index 42819ba5f..c3c7ca09c 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('okey', o_orderkey), ('lsum', lsum)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lsum': t1.lsum, 'o_orderkey': t0.o_orderkey}) +ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'lsum': DEFAULT_TO(sum_l_extendedprice, 0:numeric)}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 766ea2613..139b7720a 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('okey', o_orderkey), ('lavg', lavg)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lavg': t1.lavg, 'o_orderkey': t0.o_orderkey}) +ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'lavg': DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 5f411333a..3b22ff6b8 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,8 @@ ROOT(columns=[('reduced_size', reduced_size), ('retail_price_int', retail_price_int), ('message', message), ('discount', l_discount), ('date_dmy', date_dmy), ('date_md', date_md), ('am_pm', am_pm)], orderings=[(l_discount):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'am_pm': am_pm, 'date_dmy': date_dmy, 'date_md': date_md, 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'am_pm': t1.am_pm, 'date_dmy': t1.date_dmy, 'date_md': t1.date_md, 'l_discount': t1.l_discount, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'l_partkey': l_partkey}) + PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 0cee405f0..1dd3998d9 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -2,7 +2,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_bla JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey, 'negative_acctbal': t1.negative_acctbal, 'non_negative_acctbal': t1.non_negative_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) + PROJECT(columns={'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 4a5f82679..223d5aab8 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,12 +5,14 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'quantity': t0.quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'quantity': t1.quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows': n_rows, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index fa891ae1e..acb036880 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -2,15 +2,15 @@ ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc PROJECT(columns={'ir': ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric), 'release_year': release_year}) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'release_year': t1.release_year}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - PROJECT(columns={'pr_id': pr_id, 'release_year': YEAR(pr_release)}) + PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'release_year': t0.release_year}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'release_year': t1.release_year}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - PROJECT(columns={'pr_id': pr_id, 'release_year': YEAR(pr_release)}) + PROJECT(columns={'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index ab43e7a9a..6a96f0f3d 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,29 +1,29 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': year}) + PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 931f72ef9..df4147ff6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ir': t1.ir, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - PROJECT(columns={'de_product_id': de_product_id, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)}) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 3bbbef4f1..191147ea3 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -7,23 +7,23 @@ ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_4': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index e333d030c..f4222569e 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -4,15 +4,15 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 2dbde582e..f92f003ec 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,14 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'REVENUE': t0.REVENUE, 'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name}) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'expr_1': t1.expr_1, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 575bf0832..e81f6331d 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,22 +1,21 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'VALUE': t1.VALUE, 'min_market_share': t0.min_market_share, 'ps_partkey': t1.ps_partkey}) - PROJECT(columns={'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'metric': t0.metric}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'metric': ps_supplycost * ps_availqty}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'expr_2': t0.expr_2, 'ps_partkey': t0.ps_partkey}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 821150be7..6a11fe1ab 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,8 +1,8 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', HIGH_LINE_COUNT), ('LOW_LINE_COUNT', LOW_LINE_COUNT)], orderings=[(l_shipmode):asc_first]) PROJECT(columns={'HIGH_LINE_COUNT': DEFAULT_TO(sum_is_high_priority, 0:numeric), 'LOW_LINE_COUNT': DEFAULT_TO(sum_expr_2, 0:numeric), 'l_shipmode': l_shipmode}) AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_2': t1.expr_2, 'is_high_priority': t1.is_high_priority, 'l_shipmode': t0.l_shipmode}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'o_orderkey': o_orderkey}) + PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index 4d4ff236b..877cbcd22 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,17 +1,18 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', TOTAL_REVENUE)], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_REVENUE': t1.TOTAL_REVENUE, 's_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'total_revenue': t1.total_revenue}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'l_suppkey': l_suppkey, 'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 's_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3_1, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) + PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + PROJECT(columns={'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3, 'sum_expr_3_1': sum_expr_3}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) + PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index fc34417b8..8de1daa53 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,10 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'TOTAL_QUANTITY': t1.TOTAL_QUANTITY, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'l_orderkey': l_orderkey}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'l_orderkey': l_orderkey}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index f025d4504..81644d8d2 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'part_qty': t1.part_qty, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'part_qty': t1.part_qty}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - PROJECT(columns={'l_partkey': l_partkey, 'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index e80501af0..61ed28ec5 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -2,12 +2,12 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', REVENUE), ('O_ORDERDATE', LIMIT(limit=Literal(value=10, type=NumericType()), columns={'REVENUE': REVENUE, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'expr_1': t1.expr_1, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index a6f56996d..59ff61eef 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,19 +1,19 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t0.value}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'value': t1.value}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - PROJECT(columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'value': l_extendedprice * 1:numeric - l_discount}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 0d535541a..b4f612231 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', avg_bal), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_bal': t1.avg_bal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) +ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_bal': DEFAULT_TO(avg_c_acctbal, 0:numeric), 'c_nationkey': c_nationkey, 'count_c_acctbal': count_c_acctbal, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_rows': n_rows, 'sum_c_acctbal': sum_c_acctbal}) + PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'count_c_acctbal_1': count_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows_1': n_rows, 'sum_c_acctbal_1': sum_c_acctbal}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index c436e164f..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,9 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - PROJECT(columns={'o_totalprice_1': o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 545218244..99d446335 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,13 +1,13 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_orders': n_orders, 'order_month': order_month, 'order_year': order_year}, orderings=[(n_orders):desc_last]) AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'order_month': t1.order_month, 'order_year': t1.order_year}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - PROJECT(columns={'o_custkey': o_custkey, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql index dcfc73604..383398ff4 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql @@ -1,10 +1,7 @@ WITH _s1 AS ( SELECT - ( - 100.0 * ( - COALESCE(SUM(sbtxamount), 0) - COALESCE(SUM(sbtxtax + sbtxcommission), 0) - ) - ) / COALESCE(SUM(sbtxamount), 0) AS spm, + SUM(sbtxtax + sbtxcommission) AS sum_expr_2, + SUM(sbtxamount) AS sum_sbtxamount, sbtxtickerid FROM main.sbtransaction WHERE @@ -14,7 +11,11 @@ WITH _s1 AS ( ) SELECT sbticker.sbtickersymbol AS symbol, - _s1.spm AS SPM + ( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr_2, 0) + ) + ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid diff --git a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql index e57a0b702..0e6847d55 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql @@ -1,10 +1,7 @@ WITH _s1 AS ( SELECT - CAST(( - 100.0 * ( - COALESCE(SUM(sbtxamount), 0) - COALESCE(SUM(sbtxtax + sbtxcommission), 0) - ) - ) AS REAL) / COALESCE(SUM(sbtxamount), 0) AS spm, + SUM(sbtxtax + sbtxcommission) AS sum_expr_2, + SUM(sbtxamount) AS sum_sbtxamount, sbtxtickerid FROM main.sbtransaction WHERE @@ -14,7 +11,11 @@ WITH _s1 AS ( ) SELECT sbticker.sbtickersymbol AS symbol, - _s1.spm AS SPM + CAST(( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr_2, 0) + ) + ) AS REAL) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 6c4265832..9a9471dea 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,7 +1,7 @@ WITH _s1 AS ( SELECT - COALESCE(SUM(sale_price), 0) AS total_revenue, - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, + SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales WHERE @@ -12,11 +12,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, - _s1.total_revenue + _s1.n_rows_1 AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 2856c1f24..9f797c2bc 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,7 +1,7 @@ WITH _s1 AS ( SELECT - COALESCE(SUM(sale_price), 0) AS total_revenue, - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, + SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales WHERE @@ -14,11 +14,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, - _s1.total_revenue + _s1.n_rows_1 AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index 2cfb71c59..af2c3f84f 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -1,11 +1,8 @@ WITH _s1 AS ( SELECT - COALESCE( - SUM( - DATEDIFF(CAST(session_end_ts AS DATETIME), CAST(session_start_ts AS DATETIME), SECOND) - ), - 0 - ) AS total_duration, + SUM( + DATEDIFF(CAST(session_end_ts AS DATETIME), CAST(session_start_ts AS DATETIME), SECOND) + ) AS sum_duration, user_id FROM main.user_sessions WHERE @@ -15,7 +12,7 @@ WITH _s1 AS ( ) SELECT users.uid, - _s1.total_duration + COALESCE(_s1.sum_duration, 0) AS total_duration FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index 793b9b9b3..93fddddfd 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -1,17 +1,14 @@ WITH _s1 AS ( SELECT - COALESCE( - SUM( + SUM( + ( ( - ( - CAST(( - JULIANDAY(DATE(session_end_ts, 'start of day')) - JULIANDAY(DATE(session_start_ts, 'start of day')) - ) AS INTEGER) * 24 + CAST(STRFTIME('%H', session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%M', session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', session_start_ts) AS INTEGER) - ) * 60 + CAST(STRFTIME('%S', session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', session_start_ts) AS INTEGER) - ), - 0 - ) AS total_duration, + CAST(( + JULIANDAY(DATE(session_end_ts, 'start of day')) - JULIANDAY(DATE(session_start_ts, 'start of day')) + ) AS INTEGER) * 24 + CAST(STRFTIME('%H', session_end_ts) AS INTEGER) - CAST(STRFTIME('%H', session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%M', session_end_ts) AS INTEGER) - CAST(STRFTIME('%M', session_start_ts) AS INTEGER) + ) * 60 + CAST(STRFTIME('%S', session_end_ts) AS INTEGER) - CAST(STRFTIME('%S', session_start_ts) AS INTEGER) + ) AS sum_duration, user_id FROM main.user_sessions WHERE @@ -21,7 +18,7 @@ WITH _s1 AS ( ) SELECT users.uid, - _s1.total_duration + COALESCE(_s1.sum_duration, 0) AS total_duration FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index bd2b54392..7a8882408 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM main.devices GROUP BY de_product_id -), _t4 AS ( +), _s1 AS ( SELECT pr_id, pr_release @@ -13,23 +13,23 @@ WITH _s0 AS ( ), _s6 AS ( SELECT SUM(_s0.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t4.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year FROM _s0 AS _s0 - JOIN _t4 AS _t4 - ON _s0.de_product_id = _t4.pr_id + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id GROUP BY - EXTRACT(YEAR FROM CAST(_t4.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) ), _s7 AS ( SELECT COUNT(*) AS n_rows, - EXTRACT(YEAR FROM CAST(_t6.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s3.pr_release AS DATETIME)) AS release_year FROM main.devices AS devices - JOIN _t4 AS _t6 - ON _t6.pr_id = devices.de_product_id + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id JOIN main.incidents AS incidents ON devices.de_id = incidents.in_device_id GROUP BY - EXTRACT(YEAR FROM CAST(_t6.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s3.pr_release AS DATETIME)) ) SELECT _s6.release_year AS year, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 0f6777f90..5ef83cf6a 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -5,7 +5,7 @@ WITH _s0 AS ( FROM main.devices GROUP BY de_product_id -), _t4 AS ( +), _s1 AS ( SELECT pr_id, pr_release @@ -13,23 +13,23 @@ WITH _s0 AS ( ), _s6 AS ( SELECT SUM(_s0.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t4.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year FROM _s0 AS _s0 - JOIN _t4 AS _t4 - ON _s0.de_product_id = _t4.pr_id + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id GROUP BY - CAST(STRFTIME('%Y', _t4.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) ), _s7 AS ( SELECT COUNT(*) AS n_rows, - CAST(STRFTIME('%Y', _t6.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s3.pr_release) AS INTEGER) AS release_year FROM main.devices AS devices - JOIN _t4 AS _t6 - ON _t6.pr_id = devices.de_product_id + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id JOIN main.incidents AS incidents ON devices.de_id = incidents.in_device_id GROUP BY - CAST(STRFTIME('%Y', _t6.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', _s3.pr_release) AS INTEGER) ) SELECT _s6.release_year AS year, diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 4bf563b04..d4bcd9d17 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -7,7 +7,8 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - ROUND(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) / COUNT(*), 2) AS ir, + COUNT(*) AS n_rows, + SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -21,7 +22,7 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - _s5.ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 268abbb3c..5a99bc7fc 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -7,7 +7,8 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - ROUND(CAST(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir, + COUNT(*) AS n_rows, + SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -21,7 +22,7 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - _s5.ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 1f8c08428..712284e0c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -4,7 +4,7 @@ WITH _s14 AS ( FROM main.products WHERE pr_name = 'GoldCopper-Star' -), _t6 AS ( +), _s6 AS ( SELECT ca_dt FROM main.calendar @@ -19,7 +19,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s6 AS _s0 JOIN main.incidents AS incidents ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices @@ -32,7 +32,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t6 AS _s8 + FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) JOIN _t8 AS _t10 @@ -43,14 +43,14 @@ WITH _s14 AS ( SELECT SUM(_s7.n_rows) AS sum_expr_4, SUM(_s13.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year - FROM _t6 AS _t6 + EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) AS year + FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s6.ca_dt = _s7.ca_dt LEFT JOIN _s13 AS _s13 - ON _s13.ca_dt = _t6.ca_dt + ON _s13.ca_dt = _s6.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 4611d884e..e49cda5d1 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -4,7 +4,7 @@ WITH _s14 AS ( FROM main.products WHERE pr_name = 'GoldCopper-Star' -), _t6 AS ( +), _s6 AS ( SELECT ca_dt FROM main.calendar @@ -19,7 +19,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s6 AS _s0 JOIN main.incidents AS incidents ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices @@ -32,7 +32,7 @@ WITH _s14 AS ( SELECT COUNT(*) AS n_rows, _s8.ca_dt - FROM _t6 AS _s8 + FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') JOIN _t8 AS _t10 @@ -43,14 +43,14 @@ WITH _s14 AS ( SELECT SUM(_s7.n_rows) AS sum_expr_4, SUM(_s13.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year - FROM _t6 AS _t6 + CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) AS year + FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s6.ca_dt = _s7.ca_dt LEFT JOIN _s13 AS _s13 - ON _s13.ca_dt = _t6.ca_dt + ON _s13.ca_dt = _s6.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index a296d1e1b..337d437df 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _s2 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s2 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) GROUP BY @@ -15,7 +15,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t6 AS _s4 + FROM _s2 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY @@ -24,14 +24,14 @@ WITH _t6 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) AS year - FROM _t6 AS _t6 + EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) AS year + FROM _s2 AS _s2 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s3.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s7.ca_dt GROUP BY - EXTRACT(YEAR FROM CAST(_t6.ca_dt AS DATETIME)) + EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 783a358a4..61f47822c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _s2 AS ( SELECT ca_dt FROM main.calendar @@ -6,7 +6,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s0.ca_dt - FROM _t6 AS _s0 + FROM _s2 AS _s0 JOIN main.devices AS devices ON _s0.ca_dt = DATE(devices.de_purchase_ts, 'start of day') GROUP BY @@ -15,7 +15,7 @@ WITH _t6 AS ( SELECT COUNT(*) AS n_rows, _s4.ca_dt - FROM _t6 AS _s4 + FROM _s2 AS _s4 JOIN main.incidents AS incidents ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY @@ -24,14 +24,14 @@ WITH _t6 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) AS year - FROM _t6 AS _t6 + CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) AS year + FROM _s2 AS _s2 LEFT JOIN _s3 AS _s3 - ON _s3.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s3.ca_dt LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t6.ca_dt + ON _s2.ca_dt = _s7.ca_dt GROUP BY - CAST(STRFTIME('%Y', _t6.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) ), _t0 AS ( SELECT ROUND( diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 6659c086b..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) * 0.0001 AS min_market_share + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -20,22 +20,24 @@ WITH _s0 AS ( ON _s0.s_nationkey = _t4.n_nationkey ), _s9 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) AS value, + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, partsupp.ps_partkey FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t8 - ON _s4.s_nationkey = _t8.n_nationkey + JOIN _t4 AS _t7 + ON _s4.s_nationkey = _t7.n_nationkey GROUP BY partsupp.ps_partkey ) SELECT _s9.ps_partkey AS PS_PARTKEY, - _s9.value AS VALUE + COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON _s8.min_market_share < _s9.value + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 6659c086b..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) * 0.0001 AS min_market_share + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -20,22 +20,24 @@ WITH _s0 AS ( ON _s0.s_nationkey = _t4.n_nationkey ), _s9 AS ( SELECT - COALESCE(SUM(partsupp.ps_supplycost * partsupp.ps_availqty), 0) AS value, + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, partsupp.ps_partkey FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t8 - ON _s4.s_nationkey = _t8.n_nationkey + JOIN _t4 AS _t7 + ON _s4.s_nationkey = _t7.n_nationkey GROUP BY partsupp.ps_partkey ) SELECT _s9.ps_partkey AS PS_PARTKEY, - _s9.value AS VALUE + COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON _s8.min_market_share < _s9.value + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index b5292b051..8bee61545 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT l_discount, l_extendedprice, @@ -10,29 +10,29 @@ WITH _t5 AS ( AND l_shipdate >= CAST('1996-01-01' AS DATE) ), _s1 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, + )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t6 GROUP BY l_suppkey ), _s2 AS ( SELECT - MAX(_s1.total_revenue) AS max_revenue + MAX(COALESCE(_s1.sum_expr_2, 0)) AS max_revenue FROM tpch.supplier AS supplier JOIN _s1 AS _s1 ON _s1.l_suppkey = supplier.s_suppkey ), _s5 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, - l_suppkey, + )) AS sum_expr_3, SUM(l_extendedprice * ( 1 - l_discount - )) AS sum_expr_3 - FROM _t5 + )) AS sum_expr_3_1, + l_suppkey + FROM _t6 GROUP BY l_suppkey ) @@ -41,11 +41,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - _s5.total_revenue AS TOTAL_REVENUE + COALESCE(_s5.sum_expr_3, 0) AS TOTAL_REVENUE FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index ecc0ebcf8..25340d6a8 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t6 AS ( SELECT l_discount, l_extendedprice, @@ -9,29 +9,29 @@ WITH _t5 AS ( l_shipdate < '1996-04-01' AND l_shipdate >= '1996-01-01' ), _s1 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, + )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t6 GROUP BY l_suppkey ), _s2 AS ( SELECT - MAX(_s1.total_revenue) AS max_revenue + MAX(COALESCE(_s1.sum_expr_2, 0)) AS max_revenue FROM tpch.supplier AS supplier JOIN _s1 AS _s1 ON _s1.l_suppkey = supplier.s_suppkey ), _s5 AS ( SELECT - COALESCE(SUM(l_extendedprice * ( + SUM(l_extendedprice * ( 1 - l_discount - )), 0) AS total_revenue, - l_suppkey, + )) AS sum_expr_3, SUM(l_extendedprice * ( 1 - l_discount - )) AS sum_expr_3 - FROM _t5 + )) AS sum_expr_3_1, + l_suppkey + FROM _t6 GROUP BY l_suppkey ) @@ -40,11 +40,11 @@ SELECT supplier.s_name AS S_NAME, supplier.s_address AS S_ADDRESS, supplier.s_phone AS S_PHONE, - _s5.total_revenue AS TOTAL_REVENUE + COALESCE(_s5.sum_expr_3, 0) AS TOTAL_REVENUE FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index aa9134752..575feb447 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _s3 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t1 AS _t1 - ON NOT _t1.sum_l_quantity IS NULL - AND _t1.l_orderkey = orders.o_orderkey - AND _t1.sum_l_quantity > 300 +JOIN _s3 AS _s3 + ON NOT _s3.sum_l_quantity IS NULL + AND _s3.l_orderkey = orders.o_orderkey + AND _s3.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index aa9134752..575feb447 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _s3 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _t1 AS _t1 - ON NOT _t1.sum_l_quantity IS NULL - AND _t1.l_orderkey = orders.o_orderkey - AND _t1.sum_l_quantity > 300 +JOIN _s3 AS _s3 + ON NOT _s3.sum_l_quantity IS NULL + AND _s3.l_orderkey = orders.o_orderkey + AND _s3.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 6ee033739..64803c51d 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS part_qty, + SUM(l_quantity) AS sum_l_quantity, l_partkey FROM tpch.lineitem WHERE @@ -10,7 +10,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT part.p_partkey, - _s3.part_qty + _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.part_qty, 0) + 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index ff41af883..e5b221d69 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -1,6 +1,6 @@ WITH _s3 AS ( SELECT - COALESCE(SUM(l_quantity), 0) AS part_qty, + SUM(l_quantity) AS sum_l_quantity, l_partkey FROM tpch.lineitem WHERE @@ -10,7 +10,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT part.p_partkey, - _s3.part_qty + _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 ON _s3.l_partkey = part.p_partkey @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(_s5.part_qty, 0) + 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) GROUP BY partsupp.ps_suppkey From 1fff8eabaf640bb2f286db26bf4501099a27bc03 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 20:51:05 -0400 Subject: [PATCH 10/97] Fixed pullup bugs --- pydough/conversion/projection_pullup.py | 6 ++- pydough/sqlglot/sqlglot_relational_visitor.py | 20 +-------- .../aggregate_then_backref.txt | 11 ++--- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_aq.txt | 18 ++++---- tests/test_plan_refsols/common_prefix_b.txt | 11 ++--- tests/test_plan_refsols/common_prefix_c.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_d.txt | 29 ++++++++----- tests/test_plan_refsols/common_prefix_f.txt | 11 ++--- tests/test_plan_refsols/common_prefix_g.txt | 11 ++--- tests/test_plan_refsols/common_prefix_h.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_j.txt | 9 ++-- tests/test_plan_refsols/common_prefix_k.txt | 9 ++-- tests/test_plan_refsols/common_prefix_l.txt | 31 +++++++------- tests/test_plan_refsols/common_prefix_m.txt | 32 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_p.txt | 13 +++--- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_v.txt | 11 ++--- tests/test_plan_refsols/common_prefix_w.txt | 13 +++--- tests/test_plan_refsols/correl_14.txt | 23 +++++----- tests/test_plan_refsols/correl_15.txt | 30 ++++++------- tests/test_plan_refsols/correl_20.txt | 15 ++++--- tests/test_plan_refsols/correl_26.txt | 21 +++++----- tests/test_plan_refsols/correl_27.txt | 19 +++++---- tests/test_plan_refsols/correl_28.txt | 15 +++---- tests/test_plan_refsols/correl_29.txt | 33 ++++++++------- tests/test_plan_refsols/correl_30.txt | 23 +++++----- tests/test_plan_refsols/correl_31.txt | 27 ++++++------ tests/test_plan_refsols/correl_34.txt | 15 +++---- .../count_cust_supplier_nation_combos.txt | 13 +++--- .../epoch_culture_events_info.txt | 26 +++++++----- ...lineitems_access_cust_supplier_nations.txt | 17 ++++---- .../lines_shipping_vs_customer_region.txt | 22 +++++----- .../mostly_positive_accounts_per_nation3.txt | 17 ++++---- .../multi_partition_access_5.txt | 8 ++-- tests/test_plan_refsols/nation_best_order.txt | 4 +- .../num_positive_accounts_per_nation.txt | 13 +++--- .../test_plan_refsols/supplier_best_part.txt | 8 ++-- ...hnograph_incident_rate_by_release_year.txt | 9 ++-- .../technograph_monthly_incident_rate.txt | 29 ++++++------- ...umulative_incident_rate_goldcopperstar.txt | 25 +++++------ ..._year_cumulative_incident_rate_overall.txt | 15 +++---- tests/test_plan_refsols/tpch_q10.txt | 21 +++++----- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q2.txt | 11 ++--- tests/test_plan_refsols/tpch_q20.txt | 15 +++---- tests/test_plan_refsols/tpch_q5.txt | 25 +++++------ .../window_filter_order_10.txt | 13 +++--- tests/test_relational_nodes_to_sqlglot.py | 42 +++++++++++++++++-- .../func_rank_with_filters_a_ansi.sql | 8 ++-- ...technograph_monthly_incident_rate_ansi.sql | 26 ++++++------ ...chnograph_monthly_incident_rate_sqlite.sql | 26 ++++++------ ...tive_incident_rate_goldcopperstar_ansi.sql | 10 ++--- ...ve_incident_rate_goldcopperstar_sqlite.sql | 10 ++--- tests/test_sql_refsols/tpch_q20_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 4 +- 62 files changed, 526 insertions(+), 455 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index cdfe2a553..5a575c249 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -41,11 +41,13 @@ def widen_columns( } substitutions: dict[RelationalExpression, RelationalExpression] = {} for input_idx in range(len(node.inputs)): + input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): - expr = add_input_name(expr, node.default_input_aliases[input_idx]) + if isinstance(node, Join): + expr = add_input_name(expr, input_alias) ref_expr: ColumnReference = ColumnReference( - name, expr.data_type, input_name=node.default_input_aliases[input_idx] + name, expr.data_type, input_name=input_alias ) if expr not in existing_vals: new_name: str = name diff --git a/pydough/sqlglot/sqlglot_relational_visitor.py b/pydough/sqlglot/sqlglot_relational_visitor.py index e0b4b1475..5c3572dfd 100644 --- a/pydough/sqlglot/sqlglot_relational_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_visitor.py @@ -39,7 +39,7 @@ ) from .sqlglot_helpers import get_glot_name, set_glot_alias, unwrap_alias -from .sqlglot_identifier_finder import find_identifiers, find_identifiers_in_list +from .sqlglot_identifier_finder import find_identifiers_in_list from .sqlglot_relational_expression_visitor import SQLGlotRelationalExpressionVisitor __all__ = ["SQLGlotRelationalVisitor"] @@ -455,23 +455,7 @@ def visit_filter(self, filter: Filter) -> None: # QUALIFY. query = self._build_subquery(query, exprs) else: - # TODO: (gh #151) Refactor a simpler way to check dependent expressions. - if ( - "group" in input_expr.args - or "distinct" in input_expr.args - or "where" in input_expr.args - or "qualify" in input_expr.args - or "order" in input_expr.args - or "limit" in input_expr.args - ): - # Check if we already have a where clause or limit. We - # cannot merge these yet. - # TODO: (gh #151) Consider allowing combining where if - # limit isn't present? - query = self._build_subquery(input_expr, exprs) - else: - # Try merge the column sections - query = self._merge_selects(exprs, input_expr, find_identifiers(cond)) + query = self._build_subquery(input_expr, exprs) query = query.where(cond) self._stack.append(query) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 8fa56b648..5a6627a0f 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,7 +1,8 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'o_orderkey_1': o_orderkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 1f5ca48a2..e1c4d902b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 1f5ca48a2..e1c4d902b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 2e01acf9c..2371e5b10 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,16 +1,17 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 11b514f75..12f4fa377 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -3,8 +3,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..126aa69c8 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 9f93ea84e..e8d037b92 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,15 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t1.s_name_1}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 's_name_1': s_name, 's_nationkey_1': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4d1bb2447..6fdd01f9f 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 34e0d2986..2c6a310e8 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -5,14 +5,15 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 078ad6f20..79a8159e7 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -5,19 +5,26 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index dd8ca64e5..20bfbcac7 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t1.sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 848a95bb5..4629f2fa8 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -3,11 +3,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index eb6de35e5..114ac4f25 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -5,14 +5,15 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders) PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index a338a9b8e..db722c70a 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,7 +1,8 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 4c3a0abf6..28f7e96d9 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,7 +1,8 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 407a9894e..5c8942e45 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,19 +1,20 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum)], orderings=[(c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 65b095b33..007c85b0b 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,19 +1,21 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 03557163c..4a9dff08f 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) @@ -11,13 +11,14 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) + PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 4678a87a4..78527ac1d 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,12 +1,13 @@ ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..dec2085d2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) + PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 2c10e35a7..a1733f226 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,8 +1,9 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name_1}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 4b633dbd0..885853056 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,9 +1,10 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + PROJECT(columns={'c_custkey_1': c_custkey, 'n_name_1': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index feffdfb1a..d0f332474 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,16 +2,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 6d7af3f9c..53149e564 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,19 +2,21 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 0887475a3..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,15 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey}) - FILTER(condition=n_name == n_name, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index e7c7cde56..7aefdb4d0 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -3,16 +3,17 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index ac9583af3..4ef8b131d 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -4,15 +4,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 3f6839aaf..1ee36d030 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -4,13 +4,14 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 29bf6bb22..9c6d69d8f 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,21 +1,22 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 9f4248442..761246fb9 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,17 +3,18 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal_1, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + PROJECT(columns={'avg_cust_acctbal_1': avg_cust_acctbal, 'n_name_1': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index c868fedf4..1cfd16b15 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,18 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 4ee636ab6..1bbb483bf 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -5,14 +5,15 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 's_suppkey_1': s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 21bb4c0e6..edd89b822 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -7,12 +7,13 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index a7a2bb160..ef7fa9411 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,15 +1,19 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index c9f1900fe..179adc4d0 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,12 +1,13 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_orderkey_1 == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey_1': l_orderkey, 'l_shipdate': l_shipdate, 'n_name_1': n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index e06f975c8..6f090bc1b 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,14 +1,16 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_partkey_1 == t1.ps_partkey & t0.l_suppkey_1 == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name_1, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1, 'supplier_region_name': t1.r_name}) + PROJECT(columns={'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'o_orderdate': o_orderdate, 'r_name_1': r_name}) + JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1}) + PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey, 'r_name_1': r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index d185fd519..a57480e07 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,12 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name_1, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) + PROJECT(columns={'n_name_1': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) + PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index bca9f137e..86f173ea6 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans_1, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) + PROJECT(columns={'n_ticker_trans_1': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) @@ -10,8 +10,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..738a980a1 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 7706c56b5..21297f633 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,9 +1,10 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) + PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 223d5aab8..734ac6e6f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,10 +5,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name_1, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'p_name_1': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) + PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index acb036880..ae75e6956 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -9,8 +9,9 @@ ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 6a96f0f3d..295a255e1 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -2,21 +2,22 @@ ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows, 'year_1': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 191147ea3..7a7c31f77 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -8,18 +8,19 @@ ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index f4222569e..f3186c8d6 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -5,13 +5,14 @@ ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index f92f003ec..a77131552 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,15 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index e81f6331d..de30994c3 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,7 +1,7 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey_1}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey_1': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8de1daa53..d1bcc2be8 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,7 +1,7 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey_1, 'c_name': c_name_1, 'o_orderdate': o_orderdate_1, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice_1}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'o_orderdate_1': o_orderdate, 'o_orderkey_1': o_orderkey, 'o_totalprice_1': o_totalprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index bbad37875..06c5ad1a2 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -6,9 +6,10 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 81644d8d2..cce78a05f 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -8,11 +8,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 59ff61eef..d7cf1c363 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -2,18 +2,19 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(RE PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..c436e164f 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,8 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + PROJECT(columns={'o_totalprice_1': o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_relational_nodes_to_sqlglot.py b/tests/test_relational_nodes_to_sqlglot.py index c992246ca..be5c9f52f 100644 --- a/tests/test_relational_nodes_to_sqlglot.py +++ b/tests/test_relational_nodes_to_sqlglot.py @@ -332,7 +332,15 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: Ident(this="a", quoted=False), Ident(this="b", quoted=False), ], - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), @@ -379,7 +387,17 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: Ident(this="a", quoted=False), Ident(this="b", quoted=False), ], - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom( + Table(this=Ident(this="table", quoted=False)) + ), + ) + ), where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), @@ -1419,7 +1437,15 @@ def mkglot_func(op: type[Expression], args: list[Expression]) -> Expression: where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), ), id="root_after_filter", ), @@ -1781,7 +1807,15 @@ def test_expression_identifiers(expr: Expression, expected: set[Ident]) -> None: where=mkglot_func( EQ, [Ident(this="a", quoted=False), mk_literal(1, False)] ), - _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + _from=GlotFrom( + mkglot( + expressions=[ + Ident(this="a", quoted=False), + Ident(this="b", quoted=False), + ], + _from=GlotFrom(Table(this=Ident(this="table", quoted=False))), + ) + ), ), id="root_after_filter", ), diff --git a/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql b/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql index 322ee79e7..d2b5d7fcf 100644 --- a/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql +++ b/tests/test_sql_refsols/func_rank_with_filters_a_ansi.sql @@ -1,16 +1,14 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT RANK() OVER (ORDER BY a) AS r, a, b FROM table - WHERE - b = 0 ) SELECT a, b, r -FROM _t0 +FROM _t1 WHERE - r >= 3 + b = 0 AND r >= 3 diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 593db1ec1..211f66449 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -4,7 +4,7 @@ WITH _t5 AS ( FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t7 AS ( +), _t8 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _s0.ca_dt - FROM _t5 AS _s0 + _t7.ca_dt + FROM _t5 AS _t7 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_s0.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t8 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _s0.ca_dt + _t7.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _s8.ca_dt - FROM _t5 AS _s8 + _t10.ca_dt + FROM _t5 AS _t10 JOIN main.incidents AS incidents - ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t8 AS _t11 + ON _t11.co_id = devices.de_production_country_id GROUP BY - _s8.ca_dt + _t10.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 581509700..e896b36ac 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -4,7 +4,7 @@ WITH _t5 AS ( FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t7 AS ( +), _t8 AS ( SELECT co_id, co_name @@ -14,29 +14,29 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _s0.ca_dt - FROM _t5 AS _s0 + _t7.ca_dt + FROM _t5 AS _t7 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_s0.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t8 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _s0.ca_dt + _t7.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _s8.ca_dt - FROM _t5 AS _s8 + _t10.ca_dt + FROM _t5 AS _t10 JOIN main.incidents AS incidents - ON _s8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t8 AS _t11 + ON _t11.co_id = devices.de_production_country_id GROUP BY - _s8.ca_dt + _t10.ca_dt ) SELECT CONCAT_WS( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 712284e0c..292b7386e 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t8 AS ( +), _t9 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t8 - ON _t8.pr_id = devices.de_product_id + JOIN _t9 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t10 - ON _t10.pr_id = devices.de_product_id + JOIN _t9 AS _t11 + ON _t11.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index e49cda5d1..4becabea9 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t8 AS ( +), _t9 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t8 - ON _t8.pr_id = devices.de_product_id + JOIN _t9 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t10 - ON _t10.pr_id = devices.de_product_id + JOIN _t9 AS _t11 + ON _t11.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 64803c51d..52747c6e5 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey, + part.p_partkey AS p_partkey_1, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey + ON _s5.p_partkey_1 = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index e5b221d69..c0f053dfc 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey, + part.p_partkey AS p_partkey_1, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey = partsupp.ps_partkey + ON _s5.p_partkey_1 = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) From 07136d2a6838758bf947857abb8ee770c4c78f62 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 12 Jul 2025 21:09:28 -0400 Subject: [PATCH 11/97] Pullup with LIMIT [RUN CI] --- pydough/conversion/merge_projects.py | 50 ++++++++--- pydough/conversion/projection_pullup.py | 55 +++++++++++- tests/test_plan_refsols/bad_child_reuse_1.txt | 8 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 17 ++-- tests/test_plan_refsols/bad_child_reuse_3.txt | 17 ++-- tests/test_plan_refsols/bad_child_reuse_4.txt | 19 ++-- tests/test_plan_refsols/bad_child_reuse_5.txt | 8 +- tests/test_plan_refsols/common_prefix_ad.txt | 32 ++++--- tests/test_plan_refsols/common_prefix_ag.txt | 89 +++++++++---------- tests/test_plan_refsols/common_prefix_ah.txt | 71 ++++++++------- tests/test_plan_refsols/common_prefix_ai.txt | 65 +++++++------- tests/test_plan_refsols/common_prefix_aj.txt | 89 +++++++++---------- tests/test_plan_refsols/common_prefix_ak.txt | 87 +++++++++--------- tests/test_plan_refsols/common_prefix_al.txt | 35 ++++---- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_ao.txt | 55 ++++++------ tests/test_plan_refsols/common_prefix_c.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_d.txt | 59 ++++++------ tests/test_plan_refsols/common_prefix_h.txt | 45 +++++----- tests/test_plan_refsols/common_prefix_i.txt | 23 +++-- tests/test_plan_refsols/common_prefix_l.txt | 39 ++++---- tests/test_plan_refsols/common_prefix_m.txt | 41 +++++---- tests/test_plan_refsols/common_prefix_n.txt | 49 +++++----- tests/test_plan_refsols/common_prefix_o.txt | 55 ++++++------ tests/test_plan_refsols/common_prefix_p.txt | 6 +- tests/test_plan_refsols/common_prefix_q.txt | 4 +- tests/test_plan_refsols/common_prefix_r.txt | 4 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 15 ++-- tests/test_plan_refsols/correl_10.txt | 13 ++- tests/test_plan_refsols/correl_14.txt | 32 +++---- tests/test_plan_refsols/correl_15.txt | 40 +++++---- tests/test_plan_refsols/correl_17.txt | 9 +- tests/test_plan_refsols/correl_18.txt | 17 ++-- tests/test_plan_refsols/correl_2.txt | 27 +++--- tests/test_plan_refsols/correl_20.txt | 26 +++--- tests/test_plan_refsols/correl_24.txt | 14 ++- tests/test_plan_refsols/correl_29.txt | 59 ++++++------ tests/test_plan_refsols/correl_3.txt | 21 +++-- tests/test_plan_refsols/correl_32.txt | 4 +- .../cumulative_stock_analysis.txt | 12 ++- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 4 +- tests/test_plan_refsols/datetime_relative.txt | 7 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- tests/test_plan_refsols/dumb_aggregation.txt | 11 ++- .../epoch_culture_events_info.txt | 37 ++++---- .../epoch_intra_season_searches.txt | 49 +++++----- .../epoch_pct_searches_per_tod.txt | 11 ++- .../epoch_search_results_by_tod.txt | 11 ++- .../epoch_unique_users_per_engine.txt | 19 ++-- tests/test_plan_refsols/exponentiation.txt | 6 +- tests/test_plan_refsols/floor_and_ceil_2.txt | 6 +- tests/test_plan_refsols/function_sampler.txt | 17 ++-- tests/test_plan_refsols/hour_minute_day.txt | 11 ++- .../minutes_seconds_datediff.txt | 9 +- .../month_year_sliding_windows.txt | 29 +++--- .../mostly_positive_accounts_per_nation3.txt | 4 +- .../multi_partition_access_5.txt | 8 +- .../multi_partition_access_6.txt | 53 +++++------ tests/test_plan_refsols/nation_best_order.txt | 4 +- .../test_plan_refsols/nation_window_aggs.txt | 7 +- .../test_plan_refsols/order_quarter_test.txt | 9 +- .../ordering_name_overload.txt | 5 +- .../orders_versus_first_orders.txt | 4 +- tests/test_plan_refsols/padding_functions.txt | 7 +- tests/test_plan_refsols/part_reduced_size.txt | 14 +-- .../parts_quantity_increase_95_96.txt | 6 +- tests/test_plan_refsols/prev_next_regions.txt | 5 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 4 +- .../region_nation_window_aggs.txt | 11 ++- .../region_orders_from_nations_richest.txt | 21 +++-- tests/test_plan_refsols/sign.txt | 8 +- tests/test_plan_refsols/simple_cross_10.txt | 21 +++-- tests/test_plan_refsols/simple_cross_4.txt | 17 ++-- tests/test_plan_refsols/singular4.txt | 15 ++-- tests/test_plan_refsols/singular7.txt | 29 +++--- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../supplier_pct_national_qty.txt | 6 +- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...chnograph_country_combination_analysis.txt | 4 +- ...nograph_country_incident_rate_analysis.txt | 37 ++++---- ...aph_error_percentages_sun_set_by_error.txt | 21 +++-- ..._error_rate_sun_set_by_factory_country.txt | 25 +++--- ...hnograph_incident_rate_by_release_year.txt | 33 ++++--- .../technograph_incident_rate_per_brand.txt | 19 ++-- .../technograph_monthly_incident_rate.txt | 56 ++++++------ .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 50 +++++------ ..._year_cumulative_incident_rate_overall.txt | 32 ++++--- .../time_threshold_reached.txt | 4 +- ...top_5_nations_balance_by_num_suppliers.txt | 13 ++- .../test_plan_refsols/topk_order_by_calc.txt | 7 +- tests/test_plan_refsols/tpch_q1.txt | 11 ++- tests/test_plan_refsols/tpch_q10.txt | 4 +- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q12.txt | 15 ++-- tests/test_plan_refsols/tpch_q15.txt | 34 ++++--- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 21 ++--- tests/test_plan_refsols/tpch_q22.txt | 31 +++---- tests/test_plan_refsols/tpch_q5.txt | 39 ++++---- tests/test_plan_refsols/tpch_q7.txt | 31 ++++--- tests/test_plan_refsols/tpch_q9.txt | 33 ++++--- .../window_filter_order_10.txt | 4 +- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- .../years_months_days_hours_datediff.txt | 6 +- .../yoy_change_in_num_orders.txt | 9 +- .../cumulative_stock_analysis_ansi.sql | 70 +++++++-------- .../cumulative_stock_analysis_sqlite.sql | 70 +++++++-------- tests/test_sql_refsols/datediff_ansi.sql | 20 +++-- tests/test_sql_refsols/datediff_sqlite.sql | 20 +++-- .../defog_broker_adv3_ansi.sql | 4 +- .../defog_broker_adv3_sqlite.sql | 4 +- .../defog_broker_adv5_ansi.sql | 11 ++- .../defog_broker_adv5_sqlite.sql | 11 ++- .../defog_broker_basic3_ansi.sql | 24 +++-- .../defog_broker_basic3_sqlite.sql | 24 +++-- .../defog_dealership_adv5_ansi.sql | 26 ++---- .../defog_dealership_adv5_sqlite.sql | 26 ++---- .../defog_dealership_basic10_ansi.sql | 27 ++++-- .../defog_dealership_basic10_sqlite.sql | 27 ++++-- .../defog_dealership_basic5_ansi.sql | 27 ++++-- .../defog_dealership_basic5_sqlite.sql | 27 ++++-- .../defog_dealership_basic8_ansi.sql | 27 ++++-- .../defog_dealership_basic8_sqlite.sql | 27 ++++-- .../defog_ewallet_adv11_ansi.sql | 2 +- .../defog_ewallet_adv11_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 24 +++-- .../defog_ewallet_basic10_sqlite.sql | 24 +++-- .../defog_ewallet_basic8_ansi.sql | 24 +++-- .../defog_ewallet_basic8_sqlite.sql | 24 +++-- .../epoch_culture_events_info_ansi.sql | 58 +++++++----- .../epoch_culture_events_info_sqlite.sql | 58 +++++++----- .../epoch_pct_searches_per_tod_ansi.sql | 14 +-- .../epoch_pct_searches_per_tod_sqlite.sql | 14 +-- .../epoch_search_results_by_tod_ansi.sql | 17 ++-- .../epoch_search_results_by_tod_sqlite.sql | 17 ++-- .../floor_and_ceil_2_ansi.sql | 18 +++- .../floor_and_ceil_2_sqlite.sql | 48 +++++++--- ...ph_country_incident_rate_analysis_ansi.sql | 8 +- ..._country_incident_rate_analysis_sqlite.sql | 8 +- ...rror_percentages_sun_set_by_error_ansi.sql | 23 +++-- ...or_percentages_sun_set_by_error_sqlite.sql | 32 +++---- ...technograph_monthly_incident_rate_ansi.sql | 38 ++++---- ...chnograph_monthly_incident_rate_sqlite.sql | 38 ++++---- ...tive_incident_rate_goldcopperstar_ansi.sql | 73 +++++++-------- ...ve_incident_rate_goldcopperstar_sqlite.sql | 73 +++++++-------- ..._cumulative_incident_rate_overall_ansi.sql | 61 ++++++------- ...umulative_incident_rate_overall_sqlite.sql | 61 ++++++------- .../time_threshold_reached_ansi.sql | 6 +- .../time_threshold_reached_sqlite.sql | 6 +- tests/test_sql_refsols/tpch_q11_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q15_ansi.sql | 11 +-- tests/test_sql_refsols/tpch_q15_sqlite.sql | 11 +-- tests/test_sql_refsols/tpch_q5_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q5_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 55 +++++++----- tests/test_sql_refsols/tpch_q9_sqlite.sql | 55 +++++++----- 163 files changed, 1985 insertions(+), 1879 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 7a22b2cad..ca2d3f4d1 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -189,18 +189,44 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: while isinstance(node.input, Project): child_project: Project = node.input if isinstance(node, RelationalRoot): - # The columns of the projection can be sucked into the root - # above it if they are all pass-through/renamings, or if there - # is no convolution created (only allowed if there are no - # ordering expressions). - if all( - isinstance(expr, ColumnReference) - for expr in child_project.columns.values() - ) or ( - len(node.orderings) == 0 - and merging_doesnt_create_convolution( - node.columns, child_project.columns - ) + # # The columns of the projection can be sucked into the root + # # above it if they are all pass-through/renamings, or if there + # # is no convolution created (only allowed if there are no + # # ordering expressions). + # if all( + # isinstance(expr, ColumnReference) + # for expr in child_project.columns.values() + # ) or ( + # len(node.orderings) == 0 + # and merging_doesnt_create_convolution( + # node.columns, child_project.columns + # ) + # ): + # # Replace all column references in the root's columns with + # # the expressions from the child projection.. + # for idx, (name, expr) in enumerate(node.ordered_columns): + # new_expr = transpose_expression(expr, child_project.columns) + # node.columns[name] = new_expr + # node.ordered_columns[idx] = (name, new_expr) + # # Do the same with the sort expressions. + # for idx, sort_info in enumerate(node.orderings): + # new_expr = transpose_expression( + # sort_info.expr, child_project.columns + # ) + # node.orderings[idx] = ExpressionSortInfo( + # new_expr, sort_info.ascending, sort_info.nulls_first + # ) + # # Delete the child projection from the tree, replacing it + # # with its input. + # node._input = child_project.input + # else: + # # Otherwise, halt the merging process since it is no longer + # # possible to merge the children of this root into it. + # break + # TODO: ADD COMMENTS + if not ( + any(contains_window(expr) for expr in child_project.columns.values()) + and any(contains_window(expr) for expr in node.columns.values()) ): # Replace all column references in the root's columns with # the expressions from the child projection.. diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 5a575c249..768944058 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -9,9 +9,11 @@ from pydough.relational import ( ColumnReference, + ExpressionSortInfo, Filter, Join, JoinType, + Limit, Project, RelationalExpression, RelationalNode, @@ -178,6 +180,55 @@ def pull_project_into_filter(node: Filter) -> None: } +def pull_project_into_limit(node: Limit) -> None: + """ + TODO + """ + if not isinstance(node.input, Project): + return + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + for expr in node.columns.values(): + expr.accept(finder) + output_cols: set[ColumnReference] = finder.get_column_references() + output_names: set[str] = {col.name for col in output_cols} + + finder.reset() + for order_expr in node.orderings: + order_expr.expr.accept(finder) + order_cols: set[ColumnReference] = finder.get_column_references() + order_names: set[str] = {col.name for col in order_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + for name, expr in project.columns.items(): + new_expr: RelationalExpression = apply_substitution( + expr, transfer_substitutions, {} + ) + if (not contains_window(new_expr)) and ( + (name in output_names) != (name in order_names) + ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) + substitutions[ref_expr] = new_expr + node._columns = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.columns.items() + } + node._orderings = [ + ExpressionSortInfo( + apply_substitution(order_expr.expr, substitutions, {}), + order_expr.ascending, + order_expr.nulls_first, + ) + for order_expr in node.orderings + ] + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -192,8 +243,8 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) - case Filter(): - pull_project_into_filter(node) + case Limit(): + pull_project_into_limit(node) return pull_non_columns(node) case _: return node diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index c9a72f211..e5ed44130 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey_1), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey_1, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows_1, 'n_rows_1': n_rows}, orderings=[(c_acctbal):desc_last]) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index e1c4d902b..128fe3cf1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,11 +1,10 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index e1c4d902b..128fe3cf1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,11 +1,10 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 1205908a2..0bcaee7d1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fbf66f368..fe46e16ce 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders)], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey_1, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 2371e5b10..b4202ffb5 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', qty_shipped)], orderings=[(s_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'qty_shipped': DEFAULT_TO(sum_l_quantity, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=DAY(l_shipdate) < 4:numeric & MONTH(l_shipdate) == 2:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 425e256b7..a2ecdb1b4 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,45 +1,44 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index dc6fb7436..e5ee9d7a2 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,36 +1,35 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_high_domestic_lines': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index f1eba1ba7..d544d0cf7 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,33 +1,32 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 40885cbd1..b6ea23a97 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,45 +1,44 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('total_machine_high_domestic_revenue', total_machine_high_domestic_revenue)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric), 'total_machine_high_domestic_revenue': ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 694228447..036dab140 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,44 +1,43 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) + FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 12f4fa377..c20c51d81 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_no_tax_discount': t0.n_no_tax_discount, 'n_orders': t0.n_orders}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': n_orders}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders_1, 'n_rows': t0.n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders_1': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) FILTER(condition=n_rows > 0:numeric, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 126aa69c8..23c6a811a 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey_1], order=[]), columns={'c_custkey': c_custkey_1, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) + FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 6f23c6a66..6d114b0ce 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount)], orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': agg_1, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 20ba145d3..4ac379581 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,28 +1,27 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_no_tax_discount), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'n_no_tax_discount': n_no_tax_discount, 'n_orders': n_orders, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_no_tax_discount': DEFAULT_TO(n_rows, 0:numeric), 'n_orders': DEFAULT_TO(agg_1, 0:numeric), 'sum_n_rows': sum_n_rows}) - FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 2c6a310e8..34dc71b2e 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 79a8159e7..cb55fd26f 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,30 +1,29 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', n_orders_94), ('n_orders_95', n_orders_95), ('n_orders_96', n_orders_96)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_suppliers': n_suppliers, 'r_name': r_name, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 114ac4f25..1a8005c46 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': n_nations, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'r_name': r_name, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 22743c469..5056ae384 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', n_selected_orders)], orderings=[(n_rows):desc_last, (n_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': n_selected_orders}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'n_rows': n_rows, 'n_selected_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(n_rows):desc_last, (n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 5c8942e45..d8911515c 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,20 +1,19 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum)], orderings=[(c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 007c85b0b..24dfe2447 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', n_selected_suppliers), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', selected_suppliers_avg), ('selected_suppliers_sum', selected_suppliers_sum), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': n_selected_suppliers, 'selected_suppliers_avg': selected_suppliers_avg, 'selected_suppliers_sum': selected_suppliers_sum}, orderings=[(c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_selected_suppliers': DEFAULT_TO(n_rows, 0:numeric), 'selected_suppliers_avg': ROUND(avg_s_acctbal, 2:numeric), 'selected_suppliers_sum': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal_1), ('selected_suppliers_max', max_s_acctbal_1), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name_1)], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 01c51b8e2..71e91f37d 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,27 +1,26 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 4a9dff08f..14fc1ed20 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,30 +1,29 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) - PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) + PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 78527ac1d..9b9ef4933 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders), ('n_parts_ordered', n_parts_ordered), ('n_distinct_parts', n_distinct_parts)], orderings=[(ordering_3):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_distinct_parts': n_distinct_parts, 'n_orders': n_orders, 'n_parts_ordered': n_parts_ordered, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_distinct_parts': DEFAULT_TO(ndistinct_l_partkey, 0:numeric), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_parts_ordered': DEFAULT_TO(n_rows_1, 0:numeric), 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3_1):asc_first, (c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3_1': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index e7f6f5fd0..634dd4f69 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice_1, 'max_p_name': max_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_l_extendedprice_1': max_l_extendedprice, 'max_p_name_1': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 355dc9ad3..32ebc4101 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice_1, 'max_anything_p_name': max_anything_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice_1': max_anything_anything_l_extendedprice, 'max_anything_p_name_1': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index dec2085d2..9d8b97da5 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name_1, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate_1}) - PROJECT(columns={'c_name_1': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate_1': o_orderdate}) + FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) + PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index d77e7b9ff..a96b6f6c4 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('region_name', r_name), ('n_prefix_nations', n_prefix_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_prefix_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index a2954ac48..6420d31d8 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', n_name), ('rname', NULL_4)], orderings=[(n_name):asc_first]) - PROJECT(columns={'NULL_4': None:unknown, 'n_name': n_name}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index d0f332474..3e1783235 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,18 +1,20 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) + PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 53149e564..c94a329b1 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,22 +1,24 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) + PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 20bcf38c9..9fb817735 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 5f6ca684d..e34eb6923 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,11 +1,12 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 38a14ad9b..8b243cb8b 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('name', n_name), ('n_selected_custs', n_selected_custs)], orderings=[(n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'n_selected_custs': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) +ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 1:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index a480ec3e3..8c61c789f 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,14 +1,16 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=domestic, columns={}) + PROJECT(columns={'domestic': name_16 == n_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index d179a88b7..eb1ec52c0 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,11 +4,9 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 9c6d69d8f..0c9acaa5b 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,30 +1,29 @@ -ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_above_avg_customers), ('n_above_avg_suppliers', n_above_avg_suppliers), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'max_c_acctbal': max_c_acctbal, 'min_c_acctbal': min_c_acctbal, 'n_above_avg_customers': DEFAULT_TO(n_rows, 0:numeric), 'n_above_avg_suppliers': DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 92aafe048..97b696fec 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_nations', n_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.anything_r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'anything_r_regionkey': anything_r_regionkey}, aggregations={'n_rows': COUNT()}) - AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_r_regionkey': ANYTHING(r_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) +ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.anything_r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'anything_r_regionkey': anything_r_regionkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_r_regionkey': ANYTHING(r_regionkey)}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index e1c8d129c..a88f51fdb 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name_1, 'delta': delta}, orderings=[(delta):asc_first]) + PROJECT(columns={'anything_c_name_1': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index f0c0d24b5..79ad18038 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', txn_within_day), ('n_buys_within_day', n_buys_within_day), ('pct_apple_txns', pct_apple_txns), ('share_change', share_change), ('rolling_avg_amount', rolling_avg_amount)], orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'n_buys_within_day': RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True), 'pct_apple_txns': ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'rolling_avg_amount': ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric), 'sbTxDateTime': sbTxDateTime, 'share_change': RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 'txn_within_day': RELSIZE(args=[], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True)}) - PROJECT(columns={'sbTickerSymbol': sbTickerSymbol, 'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxType': sbTxType, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) - FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', RELSIZE(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('n_buys_within_day', RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('pct_apple_txns', ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric)), ('share_change', RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True)), ('rolling_avg_amount', ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric))], orderings=[(sbTxDateTime):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) + FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 1fa05bd28..b7b448977 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) + PROJECT(columns={'c_name_1': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 52700565c..a6322fc0c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name_1, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) + PROJECT(columns={'c_name_1': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/datetime_relative.txt b/tests/test_plan_refsols/datetime_relative.txt index 995e3bd66..ddd91f4af 100644 --- a/tests/test_plan_refsols/datetime_relative.txt +++ b/tests/test_plan_refsols/datetime_relative.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('d1', d1), ('d2', d2), ('d3', d3), ('d4', d4), ('d5', d5), ('d6', d6)], orderings=[(o_orderdate):asc_first]) - PROJECT(columns={'d1': DATETIME(o_orderdate, 'Start of Year':string), 'd2': DATETIME(o_orderdate, 'START OF MONTHS':string), 'd3': DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string), 'd4': DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string), 'd5': DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string), 'd6': DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string), 'o_orderdate': o_orderdate}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('d1', DATETIME(o_orderdate, 'Start of Year':string)), ('d2', DATETIME(o_orderdate, 'START OF MONTHS':string)), ('d3', DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string)), ('d4', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string)), ('d5', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string)), ('d6', DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string))], orderings=[(o_orderdate):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index b1b75726f..dedd53aad 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal_1, 'c_custkey': c_custkey_1, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty_1, 'ps_partkey': ps_partkey_1, 'r_name': r_name_1, 's_suppkey': s_suppkey_1}, orderings=[(n_name):asc_first]) + PROJECT(columns={'account_balance_21': account_balance_21, 'c_acctbal_1': c_acctbal, 'c_custkey_1': c_custkey, 'key_54': key_54, 'n_name': n_name, 'ps_availqty_1': ps_availqty, 'ps_partkey_1': ps_partkey, 'r_name_1': r_name, 's_suppkey_1': s_suppkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index b178db074..2604e2675 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', a3), ('a4', a4), ('a5', a5), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - PROJECT(columns={'a3': DEFAULT_TO(r_regionkey, 0:numeric), 'a4': IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric), 'a5': 1:numeric, 'n_name': n_name, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index ef7fa9411..5bbc46de5 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,19 +1,18 @@ -ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', event_year), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': event_year, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - PROJECT(columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 'event_year': YEAR(ev_dt), 's_name': s_name, 't_name': t_name}) - JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) +ROOT(columns=[('event_name', ev_name_1), ('era_name', er_name_1), ('event_year', YEAR(ev_dt)), ('season_name', s_name_1), ('tod', t_name_1)], orderings=[(ev_dt):asc_first]) + LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name_1': er_name, 'ev_dt': ev_dt, 'ev_name_1': ev_name, 's_name_1': s_name, 't_name_1': t_name}, orderings=[(ev_dt):asc_first]) + JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) + PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 51069767e..a39171a35 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,26 +1,25 @@ -ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', pct_season_searches), ('pct_event_searches', pct_event_searches)], orderings=[(anything_s_name):asc_first]) - PROJECT(columns={'anything_s_name': anything_s_name, 'pct_event_searches': ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'pct_season_searches': ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)}) - JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) +ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) + JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) + PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) + PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 7cd2e96fe..50a303fbb 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('tod', anything_t_name), ('pct_searches', pct_searches)], orderings=[(anything_t_start_hour):asc_first]) - PROJECT(columns={'anything_t_name': anything_t_name, 'anything_t_start_hour': anything_t_start_hour, 'pct_searches': ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)}) - AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - SCAN(table=SEARCHES, columns={'search_ts': search_ts}) +ROOT(columns=[('tod', anything_t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) + AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index eee911c6b..c0edae245 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('tod', anything_t_name), ('pct_searches', pct_searches), ('avg_results', avg_results)], orderings=[(anything_t_start_hour):asc_first]) - PROJECT(columns={'anything_t_name': anything_t_name, 'anything_t_start_hour': anything_t_start_hour, 'avg_results': ROUND(avg_search_num_results, 2:numeric), 'pct_searches': ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)}) - AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) +ROOT(columns=[('tod', anything_t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) + AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_name': ANYTHING(t_name), 'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 05fa11b24..133bb0087 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('engine', search_engine), ('n_users', n_users)], orderings=[(search_engine):asc_first]) - PROJECT(columns={'n_users': DEFAULT_TO(ndistinct_user_id, 0:numeric), 'search_engine': search_engine}) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine}) - AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) - FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) - SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id}) +ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine}) + AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) + FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) + SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index 8c7272291..d87d0c19e 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('low_square', low_square), ('low_sqrt', low_sqrt), ('low_cbrt', low_cbrt)], orderings=[(low_square):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_cbrt': low_cbrt, 'low_sqrt': low_sqrt, 'low_square': low_square}, orderings=[(low_square):asc_first]) - PROJECT(columns={'low_cbrt': POWER(sbDpLow, 0.3333333333333333:numeric), 'low_sqrt': SQRT(sbDpLow), 'low_square': sbDpLow ** 2:numeric}) +ROOT(columns=[('low_square', low_square_1), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square_1):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square_1': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) + PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index be5d28ade..2a4dcec3a 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', complete_parts), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'complete_parts': complete_parts, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) - PROJECT(columns={'complete_parts': FLOOR(ps_availqty), 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost_1)], orderings=[(total_cost_1):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost_1': total_cost}, orderings=[(total_cost):desc_last]) + PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index fd7228904..b3bce5eaf 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('a', a), ('b', b), ('c', c), ('d', d), ('e', e), ('f', f)], orderings=[(c_address):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'a': a, 'b': b, 'c': c, 'c_address': c_address, 'd': d, 'e': e, 'f': f}, orderings=[(c_address):asc_first]) - PROJECT(columns={'a': JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown)), 'b': ROUND(c_acctbal, 1:numeric), 'c': KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string), 'c_address': c_address, 'd': PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string)), 'e': ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string)), 'f': ROUND(c_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) +ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_address):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 0ad3b9efe..30ead1632 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('transaction_id', sbTxId), ('_expr0', _expr0), ('_expr1', _expr1), ('_expr2', _expr2)], orderings=[(sbTxId):asc_first]) - PROJECT(columns={'_expr0': HOUR(sbTxDateTime), '_expr1': MINUTE(sbTxDateTime), '_expr2': SECOND(sbTxDateTime), 'sbTxId': sbTxId}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) - FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) + FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/minutes_seconds_datediff.txt b/tests/test_plan_refsols/minutes_seconds_datediff.txt index b49585ab6..4ae64d60c 100644 --- a/tests/test_plan_refsols/minutes_seconds_datediff.txt +++ b/tests/test_plan_refsols/minutes_seconds_datediff.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('x', sbTxDateTime), ('y', y), ('minutes_diff', minutes_diff), ('seconds_diff', seconds_diff)], orderings=[(sbTxDateTime):desc_last]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'minutes_diff': minutes_diff, 'sbTxDateTime': sbTxDateTime, 'seconds_diff': seconds_diff, 'y': y}, orderings=[(sbTxDateTime):desc_last]) - PROJECT(columns={'minutes_diff': DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), 'sbTxDateTime': sbTxDateTime, 'seconds_diff': DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), 'y': datetime.datetime(2023, 4, 3, 13, 16, 30):datetime}) - FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime}, orderings=[(sbTxDateTime):desc_last]) + FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index d438e6e1e..69a863b05 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,15 +1,16 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index a57480e07..e2d259395 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name_1, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name_1': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) + FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) + PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 86f173ea6..bca9f137e 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans_1, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) - PROJECT(columns={'n_ticker_trans_1': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) @@ -10,8 +10,8 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId_1, 'sbTxType': sbTxType_1}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId_1': sbTxTickerId, 'sbTxType_1': sbTxType}) + FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 5ee7193e5..05b282309 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,25 +2,26 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -40,19 +41,21 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) + PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index 738a980a1..caee91153 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,8 +4,8 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name_1, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) - PROJECT(columns={'c_name_1': c_name, 'c_nationkey': c_nationkey, 'o_orderkey_1': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_window_aggs.txt b/tests/test_plan_refsols/nation_window_aggs.txt index ce8dc7c0a..aebe1aef0 100644 --- a/tests/test_plan_refsols/nation_window_aggs.txt +++ b/tests/test_plan_refsols/nation_window_aggs.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('nation_name', n_name), ('key_sum', key_sum), ('key_avg', key_avg), ('n_short_comment', n_short_comment), ('n_nations', n_nations)], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - PROJECT(columns={'key_avg': RELAVG(args=[n_nationkey], partition=[], order=[]), 'key_sum': RELSUM(args=[n_nationkey], partition=[], order=[]), 'n_name': n_name, 'n_nations': RELSIZE(args=[], partition=[], order=[]), 'n_regionkey': n_regionkey, 'n_short_comment': RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[], order=[])}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) +ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[], order=[])), ('n_nations', RELSIZE(args=[], partition=[], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/order_quarter_test.txt b/tests/test_plan_refsols/order_quarter_test.txt index 78b65eda8..8666a4eba 100644 --- a/tests/test_plan_refsols/order_quarter_test.txt +++ b/tests/test_plan_refsols/order_quarter_test.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('order_date', o_orderdate), ('quarter', quarter), ('quarter_start', quarter_start), ('next_quarter', next_quarter), ('prev_quarter', prev_quarter), ('two_quarters_ahead', two_quarters_ahead), ('two_quarters_behind', two_quarters_behind), ('quarters_since_1995', quarters_since_1995), ('quarters_until_2000', quarters_until_2000), ('same_quarter_prev_year', same_quarter_prev_year), ('same_quarter_next_year', same_quarter_next_year)], orderings=[(o_orderdate):asc_first]) - PROJECT(columns={'next_quarter': DATETIME(o_orderdate, '+1 quarter':string), 'o_orderdate': o_orderdate, 'prev_quarter': DATETIME(o_orderdate, '-1 quarter':string), 'quarter': QUARTER(o_orderdate), 'quarter_start': DATETIME(o_orderdate, 'start of quarter':string), 'quarters_since_1995': DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate), 'quarters_until_2000': DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string), 'same_quarter_next_year': DATETIME(o_orderdate, '+4 quarters':string), 'same_quarter_prev_year': DATETIME(o_orderdate, '-4 quarters':string), 'two_quarters_ahead': DATETIME(o_orderdate, '+2 quarters':string), 'two_quarters_behind': DATETIME(o_orderdate, '-2 quarters':string)}) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first]) + LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/ordering_name_overload.txt b/tests/test_plan_refsols/ordering_name_overload.txt index 655064a78..17c17a0d6 100644 --- a/tests/test_plan_refsols/ordering_name_overload.txt +++ b/tests/test_plan_refsols/ordering_name_overload.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('ordering_0', n_comment), ('ordering_1', n_name), ('ordering_2', n_nationkey), ('ordering_3', n_comment), ('ordering_4', n_nationkey), ('ordering_5', n_name), ('ordering_6', ordering_3), ('ordering_7', ordering_4), ('ordering_8', ordering_5)], orderings=[(ordering_3):asc_last, (ordering_4):desc_last, (ordering_5):asc_first]) - PROJECT(columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'ordering_3': LOWER(n_name), 'ordering_4': ABS(n_nationkey), 'ordering_5': LENGTH(n_comment)}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('ordering_0', n_comment), ('ordering_1', n_name), ('ordering_2', n_nationkey), ('ordering_3', n_comment), ('ordering_4', n_nationkey), ('ordering_5', n_name), ('ordering_6', LOWER(n_name)), ('ordering_7', ABS(n_nationkey)), ('ordering_8', LENGTH(n_comment))], orderings=[(LOWER(n_name)):asc_last, (ABS(n_nationkey)):desc_last, (LENGTH(n_comment)):asc_first]) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 254b7ce5c..7ac114d5f 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey_1}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey_1': o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/padding_functions.txt b/tests/test_plan_refsols/padding_functions.txt index 9c078877f..f511fb5a2 100644 --- a/tests/test_plan_refsols/padding_functions.txt +++ b/tests/test_plan_refsols/padding_functions.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('original_name', sbCustName), ('ref_rpad', ref_rpad), ('ref_lpad', ref_lpad), ('right_padded', right_padded), ('left_padded', left_padded), ('truncated_right', truncated_right), ('truncated_left', truncated_left), ('zero_pad_right', zero_pad_right), ('zero_pad_left', zero_pad_left), ('right_padded_space', right_padded_space), ('left_padded_space', left_padded_space)], orderings=[(sbCustName):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'left_padded': left_padded, 'left_padded_space': left_padded_space, 'ref_lpad': ref_lpad, 'ref_rpad': ref_rpad, 'right_padded': right_padded, 'right_padded_space': right_padded_space, 'sbCustName': sbCustName, 'truncated_left': truncated_left, 'truncated_right': truncated_right, 'zero_pad_left': zero_pad_left, 'zero_pad_right': zero_pad_right}, orderings=[(sbCustName):asc_first]) - PROJECT(columns={'left_padded': LPAD(sbCustName, 30:numeric, '#':string), 'left_padded_space': LPAD(sbCustName, 30:numeric, ' ':string), 'ref_lpad': LPAD('Cust0001':string, 30:numeric, '*':string), 'ref_rpad': RPAD('Cust0001':string, 30:numeric, '*':string), 'right_padded': RPAD(sbCustName, 30:numeric, '*':string), 'right_padded_space': RPAD(sbCustName, 30:numeric, ' ':string), 'sbCustName': sbCustName, 'truncated_left': LPAD(sbCustName, 8:numeric, '-':string), 'truncated_right': RPAD(sbCustName, 8:numeric, '-':string), 'zero_pad_left': LPAD(sbCustName, 0:numeric, '.':string), 'zero_pad_right': RPAD(sbCustName, 0:numeric, '.':string)}) - SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) +ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbCustName': sbCustName}, orderings=[(sbCustName):asc_first]) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 3b22ff6b8..93d6f9723 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('reduced_size', reduced_size), ('retail_price_int', retail_price_int), ('message', message), ('discount', l_discount), ('date_dmy', date_dmy), ('date_md', date_md), ('am_pm', am_pm)], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'am_pm': am_pm, 'date_dmy': date_dmy, 'date_md': date_md, 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - PROJECT(columns={'am_pm': STRING(l_receiptdate, '%H:%M%p':string), 'date_dmy': STRING(l_receiptdate, '%d-%m-%Y':string), 'date_md': STRING(l_receiptdate, '%m/%d':string), 'l_discount': l_discount, 'message': message, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'message': t0.message, 'reduced_size': t0.reduced_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'message': message, 'p_partkey': p_partkey, 'reduced_size': reduced_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'message': JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size)), 'p_partkey': p_partkey, 'reduced_size': FLOAT(p_size / 2.5:numeric), 'retail_price_int': INTEGER(p_retailprice)}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int_1), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(l_discount):desc_last]) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int_1}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 391c0d4fb..4573cc866 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('name', p_name), ('qty_95', qty_95), ('qty_96', qty_96)], orderings=[(ordering_2):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'ordering_2': ordering_2, 'p_name': p_name, 'qty_95': qty_95, 'qty_96': qty_96}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'qty_95': DEFAULT_TO(sum_l_quantity, 0:numeric), 'qty_96': DEFAULT_TO(agg_1, 0:numeric)}) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2_1):desc_last, (p_name):asc_first]) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2_1': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) + PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/prev_next_regions.txt b/tests/test_plan_refsols/prev_next_regions.txt index 7b0786314..2c8e9b151 100644 --- a/tests/test_plan_refsols/prev_next_regions.txt +++ b/tests/test_plan_refsols/prev_next_regions.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('two_preceding', two_preceding), ('one_preceding', one_preceding), ('current', r_name), ('one_following', one_following), ('two_following', two_following)], orderings=[(r_name):asc_first]) - PROJECT(columns={'one_following': NEXT(args=[r_name], partition=[], order=[(r_name):asc_last]), 'one_preceding': PREV(args=[r_name], partition=[], order=[(r_name):asc_last]), 'r_name': r_name, 'two_following': PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=-2), 'two_preceding': PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=2)}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('two_preceding', PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=2)), ('one_preceding', PREV(args=[r_name], partition=[], order=[(r_name):asc_last])), ('current', r_name), ('one_following', NEXT(args=[r_name], partition=[], order=[(r_name):asc_last])), ('two_following', PREV(args=[r_name], partition=[], order=[(r_name):asc_last], n=-2))], orderings=[(r_name):asc_first]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 720bf6ef4..085f35880 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name_1, 'rank': rank}, orderings=[(rank):asc_first]) + PROJECT(columns={'n_name_1': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 9372d9bb7..c21a1b136 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) + LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name_1, 'rank': rank}, orderings=[(p_partkey):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'r_name_1': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index 60a4990f4..a748f74d8 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('nation_name', n_name), ('key_sum', key_sum), ('key_avg', key_avg), ('n_short_comment', n_short_comment), ('n_nations', n_nations)], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - PROJECT(columns={'key_avg': RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[]), 'key_sum': RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[]), 'n_name': n_name, 'n_nations': RELSIZE(args=[], partition=[n_regionkey], order=[]), 'n_regionkey': n_regionkey, 'n_short_comment': RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) +ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index f9cbc020a..d8ec49ff3 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_orders', n_orders)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/sign.txt b/tests/test_plan_refsols/sign.txt index 92ba41a82..7c2d6ac5c 100644 --- a/tests/test_plan_refsols/sign.txt +++ b/tests/test_plan_refsols/sign.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('high', sbDpHigh), ('high_neg', high_neg), ('high_zero', high_zero), ('sign_high', sign_high), ('sign_high_neg', sign_high_neg), ('sign_high_zero', sign_high_zero)], orderings=[(sbDpHigh):asc_first]) - PROJECT(columns={'high_neg': high_neg, 'high_zero': high_zero, 'sbDpHigh': sbDpHigh, 'sign_high': SIGN(sbDpHigh), 'sign_high_neg': SIGN(high_neg), 'sign_high_zero': SIGN(high_zero)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'high_neg': high_neg, 'high_zero': high_zero, 'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) - PROJECT(columns={'high_neg': -1:numeric * sbDpHigh, 'high_zero': 0:numeric * sbDpHigh, 'sbDpHigh': sbDpHigh}) - SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) +ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) + SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 7e068244a..7eb23ef93 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('region_name', r_name), ('n_other_nations', n_other_nations)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_other_nations': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) - FILTER(condition=name_3 != r_name, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) + FILTER(condition=name_3 != r_name, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index c4d48ea2e..b2df7c11c 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('region_name', r_name), ('n_other_regions', n_other_regions)], orderings=[(r_name):asc_first]) - PROJECT(columns={'n_other_regions': DEFAULT_TO(n_rows, 0:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=name_3 != r_name & SLICE(name_3, None:unknown, 1:numeric, None:unknown) == SLICE(r_name, None:unknown, 1:numeric, None:unknown), columns={'r_regionkey': r_regionkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=name_3 != r_name & SLICE(name_3, None:unknown, 1:numeric, None:unknown) == SLICE(r_name, None:unknown, 1:numeric, None:unknown), columns={'r_regionkey': r_regionkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'name_3': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index 889ab189c..d32e6626b 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,8 +1,9 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) + PROJECT(columns={'c_name_1': c_name, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index bb1b832a9..4da08ecfe 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,15 +1,16 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name_1, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) + PROJECT(columns={'n_orders': n_orders, 'p_name_1': p_name, 's_name': s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 734ac6e6f..f63e075d7 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -5,8 +5,8 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows_1, 'p_name': p_name_1, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'p_name_1': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index d7031fdb8..a37b6f7bc 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', supplier_quantity), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'supplier_quantity': supplier_quantity}, orderings=[(national_qty_pct):desc_last]) - PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'supplier_quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) +ROOT(columns=[('supplier_name', s_name_1), ('nation_name', n_name_1), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct_1)], orderings=[(national_qty_pct_1):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name_1': n_name, 'national_qty_pct_1': national_qty_pct, 's_name_1': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) + PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index e95879b55..916816a00 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name_1, 's_name': s_name_1}, orderings=[(acctbal_delta):desc_last]) + PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name_1': r_name, 's_name_1': s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index a380dffc1..0d29adc03 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name_1, 'ir': ir, 'purchase_country': name_2}, orderings=[(ir):desc_last]) + PROJECT(columns={'co_name_1': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'name_2': name_2}) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index ad8de3341..11675bca2 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,23 +1,22 @@ -ROOT(columns=[('country_name', co_name), ('made_ir', made_ir), ('sold_ir', sold_ir), ('user_ir', user_ir)], orderings=[(co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'made_ir': ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric), 'sold_ir': ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric), 'user_ir': ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric)}) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) +ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) - AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) - JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) - SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) + AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) + SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index 6c3682229..b040a95ee 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('error', er_name), ('pct', pct)], orderings=[(pct):desc_last]) - PROJECT(columns={'er_name': er_name, 'pct': ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)}) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) - AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) +ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) + AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 3ad6cf25b..89fe7388c 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('country', co_name), ('ir', ir)], orderings=[(co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index ae75e6956..baa9bd355 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('year', release_year), ('ir', ir)], orderings=[(release_year):asc_first]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric), 'release_year': release_year}) - JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) + JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) + AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'release_year': YEAR(pr_release)}) + JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index e44097202..5914783da 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ir)], orderings=[(pr_brand):asc_first]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand}) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 295a255e1..e46df112f 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,30 +1,30 @@ -ROOT(columns=[('month', month_0), ('ir', ir)], orderings=[(month):asc_first]) - PROJECT(columns={'ir': ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric), 'month': month, 'month_0': JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'month': MONTH(ca_dt), 'n_rows': n_rows, 'year': year}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows, 'year_1': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) +ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows_1, 'month': t0.month_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) + PROJECT(columns={'ca_dt_1': ca_dt, 'month_1': month, 'n_rows_1': n_rows, 'year_1': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index df4147ff6..e2c60b089 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,6 +1,6 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand_1, 'pr_name': pr_name_1, 'pr_type': pr_type_1}, orderings=[(ir):desc_last]) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand_1': pr_brand, 'pr_name_1': pr_name, 'pr_type_1': pr_type}) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 7a7c31f77..3734ddf0e 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,30 +1,28 @@ -ROOT(columns=[('years_since_release', years_since_release), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(years_since_release):asc_first]) - PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'years_since_release': year - YEAR(release_date)}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_incidents': DEFAULT_TO(sum_expr_4, 0:numeric), 'release_date': release_date, 'year': year}) - FILTER(condition=YEAR(release_date) <= year, columns={'release_date': release_date, 'sum_expr_4': sum_expr_4, 'sum_n_rows': sum_n_rows, 'year': year}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year}) - AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) - SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) +ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) + FILTER(condition=YEAR(release_date) <= year, columns={'release_date': release_date, 'sum_expr_4': sum_expr_4, 'sum_n_rows': sum_n_rows, 'year': year}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year}) + AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) + SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index f3186c8d6..c00cf0803 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('yr', year), ('cum_ir', cum_ir), ('pct_bought_change', pct_bought_change), ('pct_incident_change', pct_incident_change), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) - PROJECT(columns={'cum_ir': ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric), 'n_devices': n_devices, 'n_incidents': n_incidents, 'pct_bought_change': ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric), 'pct_incident_change': ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric), 'year': year}) - PROJECT(columns={'n_devices': n_devices, 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) + PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index e83bd7ade..8b94ac8f2 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,7 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day_1}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day_1': txn_day}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 01860bd68..404152d0e 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', n_name), ('total_bal', total_bal)], orderings=[(ordering_0):asc_last]) - PROJECT(columns={'n_name': n_name, 'ordering_0': ordering_0, 'total_bal': DEFAULT_TO(sum_s_acctbal, 0:numeric)}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/topk_order_by_calc.txt b/tests/test_plan_refsols/topk_order_by_calc.txt index 3c0f38516..58dcdcb9f 100644 --- a/tests/test_plan_refsols/topk_order_by_calc.txt +++ b/tests/test_plan_refsols/topk_order_by_calc.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('region_name', r_name), ('name_length', name_length)], orderings=[(r_name):asc_last]) - PROJECT(columns={'name_length': LENGTH(r_name), 'r_name': r_name}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/tpch_q1.txt b/tests/test_plan_refsols/tpch_q1.txt index a33019900..14e38c38f 100644 --- a/tests/test_plan_refsols/tpch_q1.txt +++ b/tests/test_plan_refsols/tpch_q1.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', SUM_QTY), ('SUM_BASE_PRICE', SUM_BASE_PRICE), ('SUM_DISC_PRICE', SUM_DISC_PRICE), ('SUM_CHARGE', SUM_CHARGE), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) - PROJECT(columns={'SUM_BASE_PRICE': DEFAULT_TO(sum_l_extendedprice, 0:numeric), 'SUM_CHARGE': DEFAULT_TO(sum_expr_8, 0:numeric), 'SUM_DISC_PRICE': DEFAULT_TO(sum_expr_9, 0:numeric), 'SUM_QTY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'avg_l_discount': avg_l_discount, 'avg_l_extendedprice': avg_l_extendedprice, 'avg_l_quantity': avg_l_quantity, 'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag, 'n_rows': n_rows}) - AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) - PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) - FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) +ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('SUM_BASE_PRICE', DEFAULT_TO(sum_l_extendedprice, 0:numeric)), ('SUM_DISC_PRICE', DEFAULT_TO(sum_expr_9, 0:numeric)), ('SUM_CHARGE', DEFAULT_TO(sum_expr_8, 0:numeric)), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) + AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) + PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) + FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index a77131552..4ab824798 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal_1, 'c_address': c_address_1, 'c_comment': c_comment_1, 'c_custkey': c_custkey, 'c_name': c_name_1, 'c_phone': c_phone_1, 'n_name': n_name_1}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey': c_custkey, 'c_name_1': c_name, 'c_phone_1': c_phone, 'n_name_1': n_name}) JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index de30994c3..207c69bc1 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,7 +1,7 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey_1}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey_1': ps_partkey, 'sum_metric': sum_metric}) + FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 6a11fe1ab..7192a4e23 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', HIGH_LINE_COUNT), ('LOW_LINE_COUNT', LOW_LINE_COUNT)], orderings=[(l_shipmode):asc_first]) - PROJECT(columns={'HIGH_LINE_COUNT': DEFAULT_TO(sum_is_high_priority, 0:numeric), 'LOW_LINE_COUNT': DEFAULT_TO(sum_expr_2, 0:numeric), 'l_shipmode': l_shipmode}) - AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) + AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) + PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index 877cbcd22..ffb308cdf 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,18 +1,16 @@ -ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', TOTAL_REVENUE)], orderings=[(s_suppkey):asc_first]) - PROJECT(columns={'TOTAL_REVENUE': DEFAULT_TO(sum_expr_3, 0:numeric), 's_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3_1, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'l_suppkey': l_suppkey, 'sum_expr_3': sum_expr_3, 'sum_expr_3_1': sum_expr_3}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) + PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) + PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index d1bcc2be8..8de1daa53 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,7 +1,7 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey_1, 'c_name': c_name_1, 'o_orderdate': o_orderdate_1, 'o_orderkey': o_orderkey_1, 'o_totalprice': o_totalprice_1}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'o_orderdate_1': o_orderdate, 'o_orderkey_1': o_orderkey, 'o_totalprice_1': o_totalprice}) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index cce78a05f..3baa9fc80 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -7,13 +7,14 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 06077eb2c..ff832eb90 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,15 +1,16 @@ -ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', TOTACCTBAL)], orderings=[(cntry_code):asc_first]) - PROJECT(columns={'TOTACCTBAL': DEFAULT_TO(sum_c_acctbal, 0:numeric), 'cntry_code': cntry_code, 'n_rows': n_rows}) - AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) - AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) + AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) + FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) + FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) + AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) + FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index d7cf1c363..99e4bd091 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,20 +1,19 @@ -ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', REVENUE)], orderings=[(REVENUE):desc_last]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_value, 0:numeric), 'anything_n_name': anything_n_name}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) + JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) + PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index a2698fc77..e721ef7c6 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', REVENUE)], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_volume, 0:numeric), 'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}) - AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) - PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) +ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) + AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) + PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) + FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 23af1d803..8abc81dc2 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,17 +1,16 @@ -ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', AMOUNT)], orderings=[(n_name):asc_first, (o_year):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'AMOUNT': AMOUNT, 'n_name': n_name, 'o_year': o_year}, orderings=[(n_name):asc_first, (o_year):desc_last]) - PROJECT(columns={'AMOUNT': DEFAULT_TO(sum_value, 0:numeric), 'n_name': n_name, 'o_year': o_year}) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'o_year': o_year, 'sum_value': sum_value}, orderings=[(n_name):asc_first, (o_year):desc_last]) + AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index c436e164f..96e986806 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice_1 < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - PROJECT(columns={'o_totalprice_1': o_totalprice}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) + PROJECT(columns={'NULL_1': None:unknown, 'o_totalprice': o_totalprice}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index a367c4443..c85511eca 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 80af8f609..ac1149b97 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index afee87604..f7f1686d0 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', y1), ('years_diff', years_diff), ('c_years_diff', c_years_diff), ('c_y_diff', c_y_diff), ('y_diff', y_diff), ('months_diff', months_diff), ('c_months_diff', c_months_diff), ('mm_diff', mm_diff), ('days_diff', days_diff), ('c_days_diff', c_days_diff), ('c_d_diff', c_d_diff), ('d_diff', d_diff), ('hours_diff', hours_diff), ('c_hours_diff', c_hours_diff), ('c_h_diff', c_h_diff)], orderings=[(years_diff):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'c_d_diff': c_d_diff, 'c_days_diff': c_days_diff, 'c_h_diff': c_h_diff, 'c_hours_diff': c_hours_diff, 'c_months_diff': c_months_diff, 'c_y_diff': c_y_diff, 'c_years_diff': c_years_diff, 'd_diff': d_diff, 'days_diff': days_diff, 'hours_diff': hours_diff, 'mm_diff': mm_diff, 'months_diff': months_diff, 'sbTxDateTime': sbTxDateTime, 'y1': y1, 'y_diff': y_diff, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) - PROJECT(columns={'c_d_diff': DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_days_diff': DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_h_diff': DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_hours_diff': DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_months_diff': DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_y_diff': DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'c_years_diff': DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'd_diff': DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'days_diff': DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'hours_diff': DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'mm_diff': DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'months_diff': DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'sbTxDateTime': sbTxDateTime, 'y1': datetime.datetime(2025, 5, 2, 11, 0):datetime, 'y_diff': DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime), 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff_1), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff_1):asc_first]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff_1': years_diff}, orderings=[(years_diff):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/yoy_change_in_num_orders.txt b/tests/test_plan_refsols/yoy_change_in_num_orders.txt index ea9eab88a..8a2b5f26c 100644 --- a/tests/test_plan_refsols/yoy_change_in_num_orders.txt +++ b/tests/test_plan_refsols/yoy_change_in_num_orders.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', pct_change)], orderings=[(year):asc_first]) - PROJECT(columns={'n_rows': n_rows, 'pct_change': 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]))], orderings=[(year):asc_first]) + AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql b/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql index a24c81c4d..a5952cee5 100644 --- a/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql +++ b/tests/test_sql_refsols/cumulative_stock_analysis_ansi.sql @@ -1,42 +1,32 @@ -WITH _t0 AS ( - SELECT - COUNT( - CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END - ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, - ROUND( - ( - 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS pct_apple_txns, - ROUND( - AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS rolling_avg_amount, - SUM( - CASE - WHEN sbtransaction.sbtxtype = 'buy' - THEN sbtransaction.sbtxshares - ELSE 0 - sbtransaction.sbtxshares - END - ) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, - COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, - sbtransaction.sbtxdatetime - FROM main.sbtransaction AS sbtransaction - JOIN main.sbticker AS sbticker - ON sbticker.sbtickerid = sbtransaction.sbtxtickerid - WHERE - EXTRACT(MONTH FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 4 - AND EXTRACT(YEAR FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 2023 - AND sbtransaction.sbtxstatus = 'success' -) SELECT - sbtxdatetime AS date_time, - txn_within_day, - n_buys_within_day, - pct_apple_txns, - share_change, - rolling_avg_amount -FROM _t0 + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + ( + 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + CASE + WHEN sbtransaction.sbtxtype = 'buy' + THEN sbtransaction.sbtxshares + ELSE 0 - sbtransaction.sbtxshares + END + ) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + EXTRACT(MONTH FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 4 + AND EXTRACT(YEAR FROM CAST(sbtransaction.sbtxdatetime AS DATETIME)) = 2023 + AND sbtransaction.sbtxstatus = 'success' ORDER BY - sbtxdatetime + sbtransaction.sbtxdatetime diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql b/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql index 76948077b..4d63f1bc5 100644 --- a/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql +++ b/tests/test_sql_refsols/cumulative_stock_analysis_sqlite.sql @@ -1,42 +1,32 @@ -WITH _t0 AS ( - SELECT - COUNT( - CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END - ) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, - ROUND( - CAST(( - 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - ) AS REAL) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS pct_apple_txns, - ROUND( - AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS rolling_avg_amount, - SUM( - IIF( - sbtransaction.sbtxtype = 'buy', - sbtransaction.sbtxshares, - 0 - sbtransaction.sbtxshares - ) - ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, - COUNT(*) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, - sbtransaction.sbtxdatetime - FROM main.sbtransaction AS sbtransaction - JOIN main.sbticker AS sbticker - ON sbticker.sbtickerid = sbtransaction.sbtxtickerid - WHERE - CAST(STRFTIME('%Y', sbtransaction.sbtxdatetime) AS INTEGER) = 2023 - AND CAST(STRFTIME('%m', sbtransaction.sbtxdatetime) AS INTEGER) = 4 - AND sbtransaction.sbtxstatus = 'success' -) SELECT - sbtxdatetime AS date_time, - txn_within_day, - n_buys_within_day, - pct_apple_txns, - share_change, - rolling_avg_amount -FROM _t0 + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE(sbtransaction.sbtxdatetime, 'start of day') ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + CAST(( + 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) AS REAL) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + IIF( + sbtransaction.sbtxtype = 'buy', + sbtransaction.sbtxshares, + 0 - sbtransaction.sbtxshares + ) + ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + CAST(STRFTIME('%Y', sbtransaction.sbtxdatetime) AS INTEGER) = 2023 + AND CAST(STRFTIME('%m', sbtransaction.sbtxdatetime) AS INTEGER) = 4 + AND sbtransaction.sbtxstatus = 'success' ORDER BY - sbtxdatetime + sbtransaction.sbtxdatetime diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index 82f32d329..aa7347c30 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,16 +1,24 @@ +WITH _t0 AS ( + SELECT + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff_1, + sbtxdatetime + FROM main.sbtransaction + WHERE + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 + ORDER BY + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) + LIMIT 30 +) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff, + years_diff_1 AS years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MINUTE) AS minutes_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff -FROM main.sbtransaction -WHERE - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 +FROM _t0 ORDER BY - years_diff -LIMIT 30 + years_diff_1 diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index bc6d68474..734f72ca9 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,8 +1,19 @@ +WITH _t0 AS ( + SELECT + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff_1, + sbtxdatetime + FROM main.sbtransaction + WHERE + CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 + ORDER BY + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) + LIMIT 30 +) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff, + years_diff_1 AS years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -24,9 +35,6 @@ SELECT ) AS INTEGER) * 24 + CAST(STRFTIME('%H', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%H', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%M', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%M', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff -FROM main.sbtransaction -WHERE - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 +FROM _t0 ORDER BY - years_diff -LIMIT 30 + years_diff_1 diff --git a/tests/test_sql_refsols/defog_broker_adv3_ansi.sql b/tests/test_sql_refsols/defog_broker_adv3_ansi.sql index ca84729c3..46087024c 100644 --- a/tests/test_sql_refsols/defog_broker_adv3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv3_ansi.sql @@ -18,4 +18,6 @@ LEFT JOIN _s1 AS _s1 WHERE NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 ORDER BY - success_rate + ( + 100.0 * COALESCE(_s1.sum_expr_2, 0) + ) / COALESCE(_s1.n_rows, 0) diff --git a/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql index 1b5796b97..c30fd13b9 100644 --- a/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv3_sqlite.sql @@ -18,4 +18,6 @@ LEFT JOIN _s1 AS _s1 WHERE NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 ORDER BY - success_rate + CAST(( + 100.0 * COALESCE(_s1.sum_expr_2, 0) + ) AS REAL) / COALESCE(_s1.n_rows, 0) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index 479db90cc..ab77ecbff 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -32,9 +32,10 @@ WITH _s0 AS ( sbdptickerid ), _t0 AS ( SELECT - SUM(_s0.sum_sbdpclose) / SUM(_s0.count_sbdpclose) AS avg_close, MAX(_s0.max_high) AS max_high, MIN(_s0.min_low) AS min_low, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, _s0.month, sbticker.sbtickersymbol FROM _s0 AS _s0 @@ -47,10 +48,12 @@ WITH _s0 AS ( SELECT sbtickersymbol AS symbol, month, - avg_close, + sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_high, min_low, ( - avg_close - LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) - ) / LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc + ( + sum_sum_sbdpclose / sum_count_sbdpclose + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index f61260e72..e74219954 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -32,9 +32,10 @@ WITH _s0 AS ( sbdptickerid ), _t0 AS ( SELECT - CAST(SUM(_s0.sum_sbdpclose) AS REAL) / SUM(_s0.count_sbdpclose) AS avg_close, MAX(_s0.max_high) AS max_high, MIN(_s0.min_low) AS min_low, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, _s0.month, sbticker.sbtickersymbol FROM _s0 AS _s0 @@ -47,10 +48,12 @@ WITH _s0 AS ( SELECT sbtickersymbol AS symbol, month, - avg_close, + CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose AS avg_close, max_high, min_low, CAST(( - avg_close - LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) - ) AS REAL) / LAG(avg_close, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc + ( + CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose + ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index 26f6f11a1..bb8ea4609 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid +), _t0 AS ( + SELECT + sbticker.sbtickersymbol AS sbtickersymbol_1, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, + _s1.n_rows + FROM main.sbticker AS sbticker + LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid + ORDER BY + COALESCE(_s1.sum_sbtxamount, 0) DESC + LIMIT 10 ) SELECT - sbticker.sbtickersymbol AS symbol, - COALESCE(_s1.n_rows, 0) AS num_transactions, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount -FROM main.sbticker AS sbticker -LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid + sbtickersymbol_1 AS symbol, + COALESCE(n_rows, 0) AS num_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 10 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index 26f6f11a1..bb8ea4609 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid +), _t0 AS ( + SELECT + sbticker.sbtickersymbol AS sbtickersymbol_1, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, + _s1.n_rows + FROM main.sbticker AS sbticker + LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid + ORDER BY + COALESCE(_s1.sum_sbtxamount, 0) DESC + LIMIT 10 ) SELECT - sbticker.sbtickersymbol AS symbol, - COALESCE(_s1.n_rows, 0) AS num_transactions, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount -FROM main.sbticker AS sbticker -LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid + sbtickersymbol_1 AS symbol, + COALESCE(n_rows, 0) AS num_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 10 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql index 098911866..182369122 100644 --- a/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv5_ansi.sql @@ -6,23 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY salesperson_id -), _t0 AS ( - SELECT - RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC NULLS FIRST) AS sales_rank, - COALESCE(_s1.sum_sale_price, 0) AS total_sales, - salespersons.first_name, - salespersons.last_name, - _s1.n_rows - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id ) SELECT - first_name, - last_name, - total_sales, - n_rows AS num_sales, - sales_rank -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC NULLS FIRST) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_sales DESC + COALESCE(_s1.sum_sale_price, 0) DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql index 85d65af26..703043987 100644 --- a/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv5_sqlite.sql @@ -6,23 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY salesperson_id -), _t0 AS ( - SELECT - RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC) AS sales_rank, - COALESCE(_s1.sum_sale_price, 0) AS total_sales, - salespersons.first_name, - salespersons.last_name, - _s1.n_rows - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id ) SELECT - first_name, - last_name, - total_sales, - n_rows AS num_sales, - sales_rank -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_sales DESC + COALESCE(_s1.sum_sale_price, 0) DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index b8a903eb5..7a80c3684 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( sale_date >= DATE_ADD(CURRENT_TIMESTAMP(), -3, 'MONTH') GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.salespersons AS salespersons + LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 3 ) SELECT - salespersons.first_name, - salespersons.last_name, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 3 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index c28f59e7e..103007c2c 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-3 month') GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.salespersons AS salespersons + LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 3 ) SELECT - salespersons.first_name, - salespersons.last_name, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 3 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 9a9471dea..16ac04eee 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -8,15 +8,24 @@ WITH _s1 AS ( DATEDIFF(CURRENT_TIMESTAMP(), CAST(sale_date AS DATETIME), DAY) <= 30 GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + _s1.n_rows_1 AS n_rows, + _s1.sum_sale_price + FROM main.salespersons AS salespersons + JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + n_rows DESC + LIMIT 5 ) SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows_1 AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + n_rows AS total_sales, + COALESCE(sum_sale_price, 0) AS total_revenue +FROM _t0 ORDER BY - _s1.n_rows_1 DESC -LIMIT 5 + n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 9f797c2bc..b06f6bb94 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -10,15 +10,24 @@ WITH _s1 AS ( ) AS INTEGER) <= 30 GROUP BY salesperson_id +), _t0 AS ( + SELECT + salespersons.first_name AS first_name_1, + salespersons.last_name AS last_name_1, + _s1.n_rows_1 AS n_rows, + _s1.sum_sale_price + FROM main.salespersons AS salespersons + JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id + ORDER BY + n_rows DESC + LIMIT 5 ) SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows_1 AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.salespersons AS salespersons -JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id + first_name_1 AS first_name, + last_name_1 AS last_name, + n_rows AS total_sales, + COALESCE(sum_sale_price, 0) AS total_revenue +FROM _t0 ORDER BY - _s1.n_rows_1 DESC -LIMIT 5 + n_rows DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index 3ea00bc25..3ef87cc68 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -6,15 +6,24 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id +), _t0 AS ( + SELECT + cars.make AS make_1, + cars.model AS model_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 5 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + make_1 AS make, + model_1 AS model, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 5 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index 3ea00bc25..3ef87cc68 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -6,15 +6,24 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id +), _t0 AS ( + SELECT + cars.make AS make_1, + cars.model AS model_1, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, + _s1.n_rows + FROM main.cars AS cars + LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id + ORDER BY + COALESCE(_s1.sum_sale_price, 0) DESC + LIMIT 5 ) SELECT - cars.make, - cars.model, - COALESCE(_s1.n_rows, 0) AS total_sales, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue -FROM main.cars AS cars -LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id + make_1 AS make, + model_1 AS model, + COALESCE(n_rows, 0) AS total_sales, + total_revenue_1 AS total_revenue +FROM _t0 ORDER BY - total_revenue DESC -LIMIT 5 + total_revenue_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql index af2c3f84f..d9bb1d546 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_ansi.sql @@ -17,4 +17,4 @@ FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid ORDER BY - total_duration DESC + COALESCE(_s1.sum_duration, 0) DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql index 93fddddfd..c8a248291 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv11_sqlite.sql @@ -23,4 +23,4 @@ FROM main.users AS users JOIN _s1 AS _s1 ON _s1.user_id = users.uid ORDER BY - total_duration DESC + COALESCE(_s1.sum_duration, 0) DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 747d8b6d3..4e4bdb420 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,14 +9,22 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id +), _t0 AS ( + SELECT + merchants.name AS name_1, + COALESCE(_s1.sum_amount, 0) AS total_amount_1, + _s1.n_rows + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + ORDER BY + COALESCE(_s1.sum_amount, 0) DESC + LIMIT 2 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid + name_1 AS merchant_name, + COALESCE(n_rows, 0) AS total_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 2 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index 73a50f555..ed3cfe9ea 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,14 +9,22 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id +), _t0 AS ( + SELECT + merchants.name AS name_1, + COALESCE(_s1.sum_amount, 0) AS total_amount_1, + _s1.n_rows + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid + ORDER BY + COALESCE(_s1.sum_amount, 0) DESC + LIMIT 2 ) SELECT - merchants.name AS merchant_name, - COALESCE(_s1.n_rows, 0) AS total_transactions, - COALESCE(_s1.sum_amount, 0) AS total_amount -FROM main.merchants AS merchants -LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid + name_1 AS merchant_name, + COALESCE(n_rows, 0) AS total_transactions, + total_amount_1 AS total_amount +FROM _t0 ORDER BY - total_amount DESC -LIMIT 2 + total_amount_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index c605cd315..672017439 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id +), _t0 AS ( + SELECT + coupons.code AS code_1, + COALESCE(_s1.count_txid, 0) AS redemption_count_1, + _s1.sum_amount + FROM main.coupons AS coupons + LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid + ORDER BY + COALESCE(_s1.count_txid, 0) DESC + LIMIT 3 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount -FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid + code_1 AS coupon_code, + redemption_count_1 AS redemption_count, + COALESCE(sum_amount, 0) AS total_discount +FROM _t0 ORDER BY - redemption_count DESC -LIMIT 3 + redemption_count_1 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index c605cd315..672017439 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -6,14 +6,22 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id +), _t0 AS ( + SELECT + coupons.code AS code_1, + COALESCE(_s1.count_txid, 0) AS redemption_count_1, + _s1.sum_amount + FROM main.coupons AS coupons + LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid + ORDER BY + COALESCE(_s1.count_txid, 0) DESC + LIMIT 3 ) SELECT - coupons.code AS coupon_code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - COALESCE(_s1.sum_amount, 0) AS total_discount -FROM main.coupons AS coupons -LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid + code_1 AS coupon_code, + redemption_count_1 AS redemption_count, + COALESCE(sum_amount, 0) AS total_discount +FROM _t0 ORDER BY - redemption_count DESC -LIMIT 3 + redemption_count_1 DESC diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index f9e924c2b..abf9e60c6 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -3,30 +3,40 @@ WITH _s2 AS ( ev_dt, ev_key FROM events +), _t0 AS ( + SELECT + eras.er_name AS er_name_1, + events.ev_name AS ev_name_1, + seasons.s_name AS s_name_1, + times.t_name AS t_name_1, + events.ev_dt + FROM events AS events + JOIN eras AS eras + ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key + JOIN seasons AS seasons + ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key + JOIN times AS times + ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + WHERE + events.ev_typ = 'culture' + ORDER BY + ev_dt + LIMIT 6 ) SELECT - events.ev_name AS event_name, - eras.er_name AS era_name, - EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) AS event_year, - seasons.s_name AS season_name, - times.t_name AS tod -FROM events AS events -JOIN eras AS eras - ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) -JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key -JOIN seasons AS seasons - ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) -JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key -JOIN times AS times - ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) -WHERE - events.ev_typ = 'culture' + ev_name_1 AS event_name, + er_name_1 AS era_name, + EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, + s_name_1 AS season_name, + t_name_1 AS tod +FROM _t0 ORDER BY - events.ev_dt -LIMIT 6 + ev_dt diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 25f7ff28f..972bff76a 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -3,30 +3,40 @@ WITH _s2 AS ( ev_dt, ev_key FROM events +), _t0 AS ( + SELECT + eras.er_name AS er_name_1, + events.ev_name AS ev_name_1, + seasons.s_name AS s_name_1, + times.t_name AS t_name_1, + events.ev_dt + FROM events AS events + JOIN eras AS eras + ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key + JOIN seasons AS seasons + ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key + JOIN times AS times + ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + WHERE + events.ev_typ = 'culture' + ORDER BY + ev_dt + LIMIT 6 ) SELECT - events.ev_name AS event_name, - eras.er_name AS era_name, - CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) AS event_year, - seasons.s_name AS season_name, - times.t_name AS tod -FROM events AS events -JOIN eras AS eras - ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) -JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key -JOIN seasons AS seasons - ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) -JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key -JOIN times AS times - ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) -WHERE - events.ev_typ = 'culture' + ev_name_1 AS event_name, + er_name_1 AS era_name, + CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, + s_name_1 AS season_name, + t_name_1 AS tod +FROM _t0 ORDER BY - events.ev_dt -LIMIT 6 + ev_dt diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql index 4c8eab137..84cd80284 100644 --- a/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(times.t_name) AS anything_t_name, ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, @@ -9,18 +9,12 @@ WITH _t1 AS ( AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(searches.search_ts AS DATETIME)) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(( - 100.0 * n_rows - ) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql index 2ba2f91ca..d3e7dd78f 100644 --- a/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT MAX(times.t_name) AS anything_t_name, MAX(times.t_start_hour) AS anything_t_start_hour, @@ -9,18 +9,12 @@ WITH _t1 AS ( AND times.t_start_hour <= CAST(STRFTIME('%H', searches.search_ts) AS INTEGER) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(CAST(( - 100.0 * n_rows - ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches + ROUND(CAST(( + 100.0 * n_rows + ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql index 61ae5d2ab..c7e546d2a 100644 --- a/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_ansi.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT ANY_VALUE(times.t_name) AS anything_t_name, ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, @@ -10,20 +10,13 @@ WITH _t1 AS ( AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(searches.search_ts AS DATETIME)) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(avg_search_num_results, 2) AS avg_results, - ROUND(( - 100.0 * n_rows - ) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches, - avg_results + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql index 9cb23ba2b..1c7a5d778 100644 --- a/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t1 AS ( +WITH _t0 AS ( SELECT MAX(times.t_name) AS anything_t_name, MAX(times.t_start_hour) AS anything_t_start_hour, @@ -10,20 +10,13 @@ WITH _t1 AS ( AND times.t_start_hour <= CAST(STRFTIME('%H', searches.search_ts) AS INTEGER) GROUP BY times.t_name -), _t0 AS ( - SELECT - ROUND(avg_search_num_results, 2) AS avg_results, - ROUND(CAST(( - 100.0 * n_rows - ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, - anything_t_name, - anything_t_start_hour - FROM _t1 ) SELECT anything_t_name AS tod, - pct_searches, - avg_results + ROUND(CAST(( + 100.0 * n_rows + ) AS REAL) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results FROM _t0 ORDER BY anything_t_start_hour diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index f04f7a41b..2d82d40ff 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,9 +1,19 @@ +WITH _t0 AS ( + SELECT + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost_1, + ps_availqty, + ps_partkey, + ps_suppkey + FROM tpch.partsupp + ORDER BY + CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC + LIMIT 10 +) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost -FROM tpch.partsupp + total_cost_1 AS total_cost +FROM _t0 ORDER BY - total_cost DESC -LIMIT 10 + total_cost_1 DESC diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index c72007f73..251ef1ebf 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,19 +1,39 @@ +WITH _t0 AS ( + SELECT + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END AS total_cost_1, + ps_availqty, + ps_partkey, + ps_suppkey + FROM tpch.partsupp + ORDER BY + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END DESC + LIMIT 10 +) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost -FROM tpch.partsupp + total_cost_1 AS total_cost +FROM _t0 ORDER BY - total_cost DESC -LIMIT 10 + total_cost_1 DESC diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql index 45f15dc10..d900cffd0 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_ansi.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT in_device_id FROM main.incidents @@ -6,7 +6,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s3 AS ( @@ -23,7 +23,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s7 AS ( @@ -40,7 +40,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s13 AS ( diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql index 93c5d04ef..9d1838628 100644 --- a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT in_device_id FROM main.incidents @@ -6,7 +6,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s3 AS ( @@ -23,7 +23,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s7 AS ( @@ -40,7 +40,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, in_device_id - FROM _t3 + FROM _t2 GROUP BY in_device_id ), _s13 AS ( diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql index 4d76d5bdb..f96313a8a 100644 --- a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_ansi.sql @@ -9,19 +9,16 @@ WITH _s5 AS ( ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' GROUP BY incidents.in_error_id -), _t0 AS ( - SELECT - ROUND(( - 100.0 * COALESCE(_s5.n_rows, 0) - ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct, - errors.er_name - FROM main.errors AS errors - LEFT JOIN _s5 AS _s5 - ON _s5.in_error_id = errors.er_id ) SELECT - er_name AS error, - pct -FROM _t0 + errors.er_name AS error, + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id ORDER BY - pct DESC + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) DESC diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql index 410c09ec0..cc033e6bb 100644 --- a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_sqlite.sql @@ -9,22 +9,22 @@ WITH _s5 AS ( ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' GROUP BY incidents.in_error_id -), _t0 AS ( - SELECT - ROUND( - CAST(( - 100.0 * COALESCE(_s5.n_rows, 0) - ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), - 2 - ) AS pct, - errors.er_name - FROM main.errors AS errors - LEFT JOIN _s5 AS _s5 - ON _s5.in_error_id = errors.er_id ) SELECT - er_name AS error, - pct -FROM _t0 + errors.er_name AS error, + ROUND( + CAST(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), + 2 + ) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id ORDER BY - pct DESC + ROUND( + CAST(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) AS REAL) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), + 2 + ) DESC diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 211f66449..3c42321e0 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t8 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -14,26 +14,26 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t5 AS _t7 + _t6.ca_dt + FROM _t3 AS _t6 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t7.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _t6.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _t10.ca_dt - FROM _t5 AS _t10 + FROM _t3 AS _t10 JOIN main.incidents AS incidents ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t11 + JOIN _t7 AS _t11 ON _t11.co_id = devices.de_production_country_id GROUP BY _t10.ca_dt @@ -41,11 +41,11 @@ WITH _t5 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t5 AS _t5 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t5.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t5.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t5.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index e896b36ac..1c08c7d2e 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t8 AS ( +), _t7 AS ( SELECT co_id, co_name @@ -14,26 +14,26 @@ WITH _t5 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t7.ca_dt - FROM _t5 AS _t7 + _t6.ca_dt + FROM _t3 AS _t6 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t7.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t8 AS _t8 - ON _t8.co_id = devices.de_production_country_id + JOIN _t7 AS _t7 + ON _t7.co_id = devices.de_production_country_id GROUP BY - _t7.ca_dt + _t6.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, _t10.ca_dt - FROM _t5 AS _t10 + FROM _t3 AS _t10 JOIN main.incidents AS incidents ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t8 AS _t11 + JOIN _t7 AS _t11 ON _t11.co_id = devices.de_production_country_id GROUP BY _t10.ca_dt @@ -41,11 +41,11 @@ WITH _t5 AS ( SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t5 AS _t5 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t5.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t5.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - CAST(STRFTIME('%m', _t5.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t5.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index 292b7386e..d56b6a7e2 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t9 AS ( +), _t7 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t9 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t7 AS _t7 + ON _t7.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t9 AS _t11 - ON _t11.pr_id = devices.de_product_id + JOIN _t7 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( @@ -51,42 +51,33 @@ WITH _s14 AS ( ON _s13.ca_dt = _s6.ca_dt GROUP BY EXTRACT(YEAR FROM CAST(_s6.ca_dt AS DATETIME)) -), _t0 AS ( - SELECT - ROUND( - SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - ( - 100.0 * ( - COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) - ) - ) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), - 2 - ) AS pct_bought_change, - ROUND( - ( - 100.0 * ( - COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) - ) - ) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), - 2 - ) AS pct_incident_change, - _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) AS years_since_release, - COALESCE(_s15.sum_n_rows, 0) AS n_devices, - COALESCE(_s15.sum_expr_4, 0) AS n_incidents - FROM _s14 AS _s14 - JOIN _s15 AS _s15 - ON _s15.year >= EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) ) SELECT - years_since_release, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) AS years_since_release, + ROUND( + SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) + ) + ) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST) + ) + ) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year NULLS LAST), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year >= EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) ORDER BY - years_since_release + _s15.year - EXTRACT(YEAR FROM CAST(_s14.release_date AS DATETIME)) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 4becabea9..40f2461ce 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t9 AS ( +), _t7 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t9 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t7 AS _t7 + ON _t7.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t9 AS _t11 - ON _t11.pr_id = devices.de_product_id + JOIN _t7 AS _t9 + ON _t9.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( @@ -51,42 +51,33 @@ WITH _s14 AS ( ON _s13.ca_dt = _s6.ca_dt GROUP BY CAST(STRFTIME('%Y', _s6.ca_dt) AS INTEGER) -), _t0 AS ( - SELECT - ROUND( - CAST(SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - CAST(( - 100.0 * ( - COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year) - ) - ) AS REAL) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year), - 2 - ) AS pct_bought_change, - ROUND( - CAST(( - 100.0 * ( - COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year) - ) - ) AS REAL) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year), - 2 - ) AS pct_incident_change, - _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) AS years_since_release, - COALESCE(_s15.sum_n_rows, 0) AS n_devices, - COALESCE(_s15.sum_expr_4, 0) AS n_incidents - FROM _s14 AS _s14 - JOIN _s15 AS _s15 - ON _s15.year >= CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) ) SELECT - years_since_release, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) AS years_since_release, + ROUND( + CAST(SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year) + ) + ) AS REAL) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year) + ) + ) AS REAL) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year >= CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) ORDER BY - years_since_release + _s15.year - CAST(STRFTIME('%Y', _s14.release_date) AS INTEGER) diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 337d437df..1f40c54cb 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t4 AS ( +), _t2 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -32,42 +32,33 @@ WITH _s2 AS ( ON _s2.ca_dt = _s7.ca_dt GROUP BY EXTRACT(YEAR FROM CAST(_s2.ca_dt AS DATETIME)) -), _t0 AS ( - SELECT - ROUND( - SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - ( - 100.0 * ( - COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST) - ) - ) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST), - 2 - ) AS pct_bought_change, - ROUND( - ( - 100.0 * ( - COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST) - ) - ) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST), - 2 - ) AS pct_incident_change, - COALESCE(sum_expr_3, 0) AS n_devices, - COALESCE(sum_n_rows, 0) AS n_incidents, - year - FROM _t4 - WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) SELECT year AS yr, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + ROUND( + SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST) + ) + ) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year NULLS LAST), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST) + ) + ) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year NULLS LAST), + 2 + ) AS pct_incident_change, + COALESCE(sum_expr_3, 0) AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t2 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY year diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 61f47822c..53b91a35c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t4 AS ( +), _t2 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -32,42 +32,33 @@ WITH _s2 AS ( ON _s2.ca_dt = _s7.ca_dt GROUP BY CAST(STRFTIME('%Y', _s2.ca_dt) AS INTEGER) -), _t0 AS ( - SELECT - ROUND( - CAST(SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - 2 - ) AS cum_ir, - ROUND( - CAST(( - 100.0 * ( - COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year) - ) - ) AS REAL) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year), - 2 - ) AS pct_bought_change, - ROUND( - CAST(( - 100.0 * ( - COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year) - ) - ) AS REAL) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year), - 2 - ) AS pct_incident_change, - COALESCE(sum_expr_3, 0) AS n_devices, - COALESCE(sum_n_rows, 0) AS n_incidents, - year - FROM _t4 - WHERE - NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ) SELECT year AS yr, - cum_ir, - pct_bought_change, - pct_incident_change, - n_devices AS bought, - n_incidents AS incidents -FROM _t0 + ROUND( + CAST(SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS REAL) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year) + ) + ) AS REAL) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year) + ) + ) AS REAL) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_incident_change, + COALESCE(sum_expr_3, 0) AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t2 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY year diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 08eebf4aa..853086170 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,8 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day_1, - sbtxdatetime + sbtxdatetime, + DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +15,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index c69bb3b85..5e47efb24 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,15 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - DATE(sbtxdatetime, 'start of day') AS txn_day_1, - sbtxdatetime + sbtxdatetime, + DATE(sbtxdatetime, 'start of day') AS txn_day FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day_1 ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index f76f36d96..53460ad26 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -35,9 +35,7 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr_2, 0) + ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index f76f36d96..53460ad26 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -35,9 +35,7 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr_2, 0) + ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index 8bee61545..fdfcf8468 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _t5 AS ( SELECT l_discount, l_extendedprice, @@ -14,7 +14,7 @@ WITH _t6 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ), _s2 AS ( @@ -28,11 +28,8 @@ WITH _t6 AS ( SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr_3, - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_expr_3_1, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ) @@ -45,7 +42,7 @@ SELECT FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index 25340d6a8..4b6f85ec0 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t6 AS ( +WITH _t5 AS ( SELECT l_discount, l_extendedprice, @@ -13,7 +13,7 @@ WITH _t6 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ), _s2 AS ( @@ -27,11 +27,8 @@ WITH _t6 AS ( SUM(l_extendedprice * ( 1 - l_discount )) AS sum_expr_3, - SUM(l_extendedprice * ( - 1 - l_discount - )) AS sum_expr_3_1, l_suppkey - FROM _t6 + FROM _t5 GROUP BY l_suppkey ) @@ -44,7 +41,7 @@ SELECT FROM _s2 AS _s2 CROSS JOIN tpch.supplier AS supplier JOIN _s5 AS _s5 - ON _s2.max_revenue = COALESCE(_s5.sum_expr_3_1, 0) + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) AND _s5.l_suppkey = supplier.s_suppkey ORDER BY s_suppkey diff --git a/tests/test_sql_refsols/tpch_q5_ansi.sql b/tests/test_sql_refsols/tpch_q5_ansi.sql index 200918b17..c97200d61 100644 --- a/tests/test_sql_refsols/tpch_q5_ansi.sql +++ b/tests/test_sql_refsols/tpch_q5_ansi.sql @@ -27,4 +27,6 @@ JOIN _s11 AS _s11 GROUP BY nation.n_nationkey ORDER BY - revenue DESC + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC diff --git a/tests/test_sql_refsols/tpch_q5_sqlite.sql b/tests/test_sql_refsols/tpch_q5_sqlite.sql index d65176b51..6cea8d423 100644 --- a/tests/test_sql_refsols/tpch_q5_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q5_sqlite.sql @@ -27,4 +27,6 @@ JOIN _s11 AS _s11 GROUP BY nation.n_nationkey ORDER BY - revenue DESC + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 12fe871b0..99390991a 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -1,30 +1,37 @@ -SELECT - nation.n_name AS NATION, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS O_YEAR, - COALESCE( +WITH _t0 AS ( + SELECT SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ), - 0 - ) AS AMOUNT -FROM tpch.lineitem AS lineitem -JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' -JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey -JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey -JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey -GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + ) AS sum_value, + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS o_year + FROM tpch.lineitem AS lineitem + JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' + JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey + JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + ORDER BY + n_name, + o_year DESC + LIMIT 10 +) +SELECT + n_name AS NATION, + o_year AS O_YEAR, + COALESCE(sum_value, 0) AS AMOUNT +FROM _t0 ORDER BY - nation.n_name, + n_name, o_year DESC -LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index 37e726db9..ac17a15d1 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -1,30 +1,37 @@ -SELECT - nation.n_name AS NATION, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS O_YEAR, - COALESCE( +WITH _t0 AS ( + SELECT SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ), - 0 - ) AS AMOUNT -FROM tpch.lineitem AS lineitem -JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' -JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey -JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey -JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey -JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey -GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + ) AS sum_value, + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS o_year + FROM tpch.lineitem AS lineitem + JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' + JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey + JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + ORDER BY + n_name, + o_year DESC + LIMIT 10 +) +SELECT + n_name AS NATION, + o_year AS O_YEAR, + COALESCE(sum_value, 0) AS AMOUNT +FROM _t0 ORDER BY - nation.n_name, + n_name, o_year DESC -LIMIT 10 From a678974407712fa44b3d48d5a61028bb3cd8d0a1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:05:11 -0400 Subject: [PATCH 12/97] Resolving conflicts --- pydough/pydough_operators/type_inference/type_verifier.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydough/pydough_operators/type_inference/type_verifier.py b/pydough/pydough_operators/type_inference/type_verifier.py index 79d2900b5..092621c49 100644 --- a/pydough/pydough_operators/type_inference/type_verifier.py +++ b/pydough/pydough_operators/type_inference/type_verifier.py @@ -15,10 +15,9 @@ from abc import ABC, abstractmethod from typing import Any -from pydough.errors import PyDoughQDAGException +from pydough.errors import PyDoughMetadataException, PyDoughQDAGException from pydough.errors.error_utils import ( NoExtraKeys, - PyDoughMetadataException, extract_array, extract_integer, extract_string, From d7ec696fe9e841a6b567349ecb2cb3bbabdf1d2a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:13:36 -0400 Subject: [PATCH 13/97] Adding extra round of bubbling --- pydough/conversion/relational_converter.py | 3 ++ tests/test_plan_refsols/aggregate_semi.txt | 13 +++--- .../aggregate_then_backref.txt | 11 +++-- .../aggregation_analytics_2.txt | 27 ++++++------ .../aggregation_analytics_3.txt | 27 ++++++------ tests/test_plan_refsols/bad_child_reuse_1.txt | 15 ++++--- tests/test_plan_refsols/bad_child_reuse_5.txt | 11 +++-- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_aq.txt | 18 ++++---- tests/test_plan_refsols/common_prefix_b.txt | 11 +++-- tests/test_plan_refsols/common_prefix_c.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_d.txt | 41 +++++++++---------- tests/test_plan_refsols/common_prefix_f.txt | 11 +++-- tests/test_plan_refsols/common_prefix_g.txt | 11 +++-- tests/test_plan_refsols/common_prefix_h.txt | 21 +++++----- tests/test_plan_refsols/common_prefix_j.txt | 9 ++-- tests/test_plan_refsols/common_prefix_k.txt | 9 ++-- tests/test_plan_refsols/common_prefix_l.txt | 31 +++++++------- tests/test_plan_refsols/common_prefix_m.txt | 36 ++++++++-------- tests/test_plan_refsols/common_prefix_o.txt | 15 ++++--- tests/test_plan_refsols/common_prefix_p.txt | 17 ++++---- tests/test_plan_refsols/common_prefix_q.txt | 4 +- tests/test_plan_refsols/common_prefix_r.txt | 4 +- tests/test_plan_refsols/common_prefix_v.txt | 11 +++-- tests/test_plan_refsols/common_prefix_w.txt | 13 +++--- tests/test_plan_refsols/correl_14.txt | 23 +++++------ tests/test_plan_refsols/correl_15.txt | 30 +++++++------- tests/test_plan_refsols/correl_24.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 21 +++++----- tests/test_plan_refsols/correl_27.txt | 19 ++++----- tests/test_plan_refsols/correl_28.txt | 15 ++++--- tests/test_plan_refsols/correl_29.txt | 33 ++++++++------- tests/test_plan_refsols/correl_30.txt | 23 +++++------ tests/test_plan_refsols/correl_31.txt | 27 ++++++------ tests/test_plan_refsols/correl_32.txt | 4 +- tests/test_plan_refsols/correl_34.txt | 15 ++++--- .../count_cust_supplier_nation_combos.txt | 13 +++--- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 4 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- .../epoch_culture_events_info.txt | 30 ++++++-------- tests/test_plan_refsols/exponentiation.txt | 4 +- tests/test_plan_refsols/floor_and_ceil_2.txt | 4 +- ...lineitems_access_cust_supplier_nations.txt | 17 ++++---- .../lines_shipping_vs_customer_region.txt | 22 +++++----- .../mostly_positive_accounts_per_nation3.txt | 13 +++--- ...ple_simple_aggregations_multiple_calcs.txt | 17 ++++---- .../num_positive_accounts_per_nation.txt | 13 +++--- .../orders_versus_first_orders.txt | 4 +- tests/test_plan_refsols/part_reduced_size.txt | 11 +++-- .../parts_quantity_increase_95_96.txt | 4 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 4 +- tests/test_plan_refsols/singular4.txt | 15 ++++--- tests/test_plan_refsols/singular7.txt | 29 +++++++------ .../sqlite_udf_count_epsilon.txt | 17 ++++---- .../sqlite_udf_covar_pop.txt | 17 ++++---- .../test_plan_refsols/sqlite_udf_decode3.txt | 9 ++-- .../sqlite_udf_format_datetime.txt | 7 ++-- tests/test_plan_refsols/sqlite_udf_gcat.txt | 5 +-- tests/test_plan_refsols/sqlite_udf_nval.txt | 9 ++-- .../sqlite_udf_percent_positive.txt | 24 +++++------ tests/test_plan_refsols/sqlite_udf_relmin.txt | 11 +++-- .../test_plan_refsols/supplier_best_part.txt | 14 +++---- .../supplier_pct_national_qty.txt | 4 +- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...chnograph_country_combination_analysis.txt | 4 +- ...hnograph_incident_rate_by_release_year.txt | 9 ++-- .../technograph_monthly_incident_rate.txt | 33 ++++++++------- .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 29 +++++++------ ..._year_cumulative_incident_rate_overall.txt | 19 ++++----- tests/test_plan_refsols/tpch_q10.txt | 25 ++++++----- tests/test_plan_refsols/tpch_q2.txt | 11 +++-- tests/test_plan_refsols/tpch_q20.txt | 15 ++++--- tests/test_plan_refsols/tpch_q5.txt | 25 ++++++----- .../various_aggfuncs_simple.txt | 7 ++-- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- .../years_months_days_hours_datediff.txt | 4 +- tests/test_sql_refsols/datediff_ansi.sql | 10 ++--- tests/test_sql_refsols/datediff_sqlite.sql | 10 ++--- .../defog_broker_basic3_ansi.sql | 14 +++---- .../defog_broker_basic3_sqlite.sql | 14 +++---- .../defog_dealership_basic10_ansi.sql | 18 ++++---- .../defog_dealership_basic10_sqlite.sql | 18 ++++---- .../defog_dealership_basic5_ansi.sql | 12 +++--- .../defog_dealership_basic5_sqlite.sql | 12 +++--- .../defog_dealership_basic8_ansi.sql | 18 ++++---- .../defog_dealership_basic8_sqlite.sql | 18 ++++---- .../defog_ewallet_basic10_ansi.sql | 14 +++---- .../defog_ewallet_basic10_sqlite.sql | 14 +++---- .../defog_ewallet_basic8_ansi.sql | 12 +++--- .../defog_ewallet_basic8_sqlite.sql | 12 +++--- .../epoch_culture_events_info_ansi.sql | 18 ++++---- .../epoch_culture_events_info_sqlite.sql | 18 ++++---- .../floor_and_ceil_2_ansi.sql | 10 ++--- .../floor_and_ceil_2_sqlite.sql | 24 ++++------- .../sqlite_udf_count_epsilon_sqlite.sql | 8 ++-- .../sqlite_udf_decode3_sqlite.sql | 16 ++++++-- .../sqlite_udf_format_datetime_sqlite.sql | 13 +++++- .../sqlite_udf_gcat_sqlite.sql | 16 ++------ .../sqlite_udf_nval_sqlite.sql | 32 +++++---------- .../sqlite_udf_percent_positive_sqlite.sql | 22 ++++------ tests/test_sql_refsols/tpch_q20_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 4 +- 106 files changed, 717 insertions(+), 802 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index d4e43f901..66754ba96 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1456,6 +1456,9 @@ def optimize_relational_tree( # Step 10: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) + # Step 8: re-run column bubbling + root = bubble_column_names(root) + # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index f8dbb71a3..8b74b522a 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - PROJECT(columns={'avg_p_retailprice_1': avg_p_retailprice, 'n_rows': n_rows, 'ps_suppkey': ps_suppkey, 'sum_p_retailprice': sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 5a6627a0f..8fa56b648 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,8 +1,7 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'o_orderkey_1': o_orderkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 365ac3d0e..a3fc5b678 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,18 +1,17 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 82e33e815..8071a16f2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,18 +1,17 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey_1': anything_ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index e5ed44130..df12efecf 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('cust_key', c_custkey_1), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows_1 > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey_1, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows_1, 'n_rows_1': n_rows}, orderings=[(c_acctbal):desc_last]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fe46e16ce..98c75eec1 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,9 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey_1, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey_1': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index c20c51d81..240fcb590 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders_1, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders_1': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index e8d037b92..9f93ea84e 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,15 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t1.s_name_1}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 's_name_1': s_name, 's_nationkey_1': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name_1, 'ps_availqty': t1.ps_availqty_1, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - PROJECT(columns={'p_name_1': p_name, 'ps_availqty_1': ps_availqty, 'ps_suppkey_1': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 6fdd01f9f..4d1bb2447 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 34dc71b2e..953b939f0 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index cb55fd26f..6676f3f68 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,29 +1,26 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_1': sum_agg_1, 'sum_agg_29': sum_agg_29, 'sum_n_rows_1': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'agg_29': agg_29, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_expr_10': t0.sum_expr_10_1, 'sum_expr_7': t0.sum_expr_7_1, 'sum_n_rows': t0.sum_n_rows_1}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_expr_10_1': sum_expr_10, 'sum_expr_7_1': sum_expr_7, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey_1, 'expr_10': t0.n_rows_1, 'expr_7': t0.expr_7_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_nationkey_1': c_nationkey, 'expr_7_1': expr_7, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey_1, 'expr_7': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey_1': c_nationkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows, 'sum_n_rows_2': sum_n_rows_2}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index 20bfbcac7..dd8ca64e5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t1.sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 4629f2fa8..848a95bb5 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -3,12 +3,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 1a8005c46..cda098921 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'sum_agg_0': sum_agg_0, 'sum_n_rows_1': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows_1': sum_sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey_1, 'n_rows': t0.n_rows_1, 'sum_n_rows': t0.sum_n_rows_1, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows_1': n_rows, 'sum_n_rows_1': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index db722c70a..a338a9b8e 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,8 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 28f7e96d9..4c3a0abf6 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,8 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name_1, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index d8911515c..027e4d8e7 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,19 +1,18 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal_1, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows_1, 'sum_s_acctbal': t1.sum_s_acctbal_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows_1': n_rows, 'sum_s_acctbal_1': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 24dfe2447..f91eee7b1 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,20 +1,18 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal_1), ('selected_suppliers_max', max_s_acctbal_1), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name_1)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal_1, 'min_s_acctbal': t1.min_s_acctbal_1, 'n_name': t1.n_name_1, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_regionkey_1 == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal_1, 'min_s_acctbal': t0.min_s_acctbal_1, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - PROJECT(columns={'avg_s_acctbal': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 'min_s_acctbal_1': min_s_acctbal, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 14fc1ed20..907f12e2a 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -10,14 +10,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5_1, 'l_orderkey': t0.l_orderkey_1, 'l_suppkey': t0.l_suppkey_1, 'p_retailprice': t1.p_retailprice}) - PROJECT(columns={'agg_5_1': agg_5, 'l_orderkey_1': l_orderkey, 'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 9b9ef4933..3d779be70 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3_1):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3_1': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3):asc_first, (c_name):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey_1 == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - PROJECT(columns={'c_custkey_1': c_custkey, 'c_name_1': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index 634dd4f69..e7f6f5fd0 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice_1, 'max_p_name': max_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice_1': max_l_extendedprice, 'max_p_name_1': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 32ebc4101..355dc9ad3 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice_1, 'max_anything_p_name': max_anything_p_name_1, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice_1': max_anything_anything_l_extendedprice, 'max_anything_p_name_1': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index a1733f226..2c10e35a7 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_nationkey_1': n_nationkey, 'r_name_1': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 885853056..4b633dbd0 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,10 +1,9 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - PROJECT(columns={'c_custkey_1': c_custkey, 'n_name_1': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 3e1783235..c5da9fde3 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -4,17 +4,16 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index c94a329b1..aa5568ea8 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -4,21 +4,19 @@ ROOT(columns=[('n', n_rows)], orderings=[]) FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price_1, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost_1, 's_suppkey': t0.s_suppkey_1, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 'ps_partkey_1': ps_partkey, 'ps_supplycost_1': ps_supplycost, 's_suppkey_1': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price_1, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - PROJECT(columns={'global_avg_price_1': global_avg_price, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index eb1ec52c0..af102dee8 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice_1, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) - PROJECT(columns={'avg_o_totalprice_1': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 7aefdb4d0..e7c7cde56 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -3,17 +3,16 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 4ef8b131d..ac9583af3 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -4,16 +4,15 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 1ee36d030..3f6839aaf 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -4,14 +4,13 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey_1, 'n_regionkey': t0.n_regionkey_1}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey, 'n_regionkey_1': n_regionkey, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 0c9acaa5b..f3b0080b7 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,21 +1,20 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name_1, 'anything_n_regionkey': t0.anything_n_regionkey_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'min_c_acctbal': t0.min_c_acctbal_1, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - PROJECT(columns={'anything_n_name_1': anything_n_name, 'anything_n_nationkey_1': anything_n_nationkey, 'anything_n_regionkey_1': anything_n_regionkey, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows': n_rows}) - JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) - FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) + FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 761246fb9..9f4248442 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,18 +3,17 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal_1, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - PROJECT(columns={'avg_cust_acctbal_1': avg_cust_acctbal, 'n_name_1': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 1cfd16b15..c868fedf4 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,18 +1,17 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name_1, 'n_nationkey': t0.n_nationkey}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index a88f51fdb..e1c8d129c 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name_1, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name_1': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) + PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 1bbb483bf..4ee636ab6 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -5,15 +5,14 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_name_1': n_name, 's_suppkey_1': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=l_linestatus == 'F':string & l_returnflag == 'N':string, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index edd89b822..21bb4c0e6 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -7,13 +7,12 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index b7b448977..1fa05bd28 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name_1': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) + PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index a6322fc0c..52700565c 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name_1, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name_1': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) + PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index dedd53aad..b1b75726f 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal_1, 'c_custkey': c_custkey_1, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty_1, 'ps_partkey': ps_partkey_1, 'r_name': r_name_1, 's_suppkey': s_suppkey_1}, orderings=[(n_name):asc_first]) - PROJECT(columns={'account_balance_21': account_balance_21, 'c_acctbal_1': c_acctbal, 'c_custkey_1': c_custkey, 'key_54': key_54, 'n_name': n_name, 'ps_availqty_1': ps_availqty, 'ps_partkey_1': ps_partkey, 'r_name_1': r_name, 's_suppkey_1': s_suppkey}) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 5bbc46de5..d2b29d459 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,18 +1,14 @@ -ROOT(columns=[('event_name', ev_name_1), ('era_name', er_name_1), ('event_year', YEAR(ev_dt)), ('season_name', s_name_1), ('tod', t_name_1)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name_1': er_name, 'ev_dt': ev_dt, 'ev_name_1': ev_name, 's_name_1': s_name, 't_name_1': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key_1 == t1.ev_key_1, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_name': t0.ev_name_1, 's_name': t0.s_name_1, 't_name': t1.t_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key_1': ev_key, 'ev_name_1': ev_name, 's_name_1': s_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name_1, 'ev_dt': t0.ev_dt_1, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name_1, 's_name': t1.s_name_1}) - PROJECT(columns={'er_name_1': er_name, 'ev_dt_1': ev_dt, 'ev_key': ev_key, 'ev_name_1': ev_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - PROJECT(columns={'ev_key': ev_key, 's_name_1': s_name}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - PROJECT(columns={'ev_key_1': ev_key, 't_name_1': t_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) +ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) + LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index d87d0c19e..0c1fec6b1 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('low_square', low_square_1), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square_1):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square_1': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) +ROOT(columns=[('low_square', low_square), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index 2a4dcec3a..29b9464c1 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost_1)], orderings=[(total_cost_1):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost_1': total_cost}, orderings=[(total_cost):desc_last]) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index 179adc4d0..c9f1900fe 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,13 +1,12 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey_1 == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey_1': l_orderkey, 'l_shipdate': l_shipdate, 'n_name_1': n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index 6f090bc1b..e06f975c8 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,16 +1,14 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey_1 == t1.ps_partkey & t0.l_suppkey_1 == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name_1, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1, 'supplier_region_name': t1.r_name}) - PROJECT(columns={'l_partkey_1': l_partkey, 'l_suppkey_1': l_suppkey, 'n_name_1': n_name, 'o_orderdate': o_orderdate, 'r_name_1': r_name}) - JOIN(condition=t0.o_orderkey_1 == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name_1, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name_1}) - PROJECT(columns={'n_name_1': n_name, 'o_orderdate': o_orderdate, 'o_orderkey_1': o_orderkey, 'r_name_1': r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index e2d259395..d185fd519 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,12 +1,11 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 6227e214c..2186c653c 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,11 +1,8 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal_1, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal_1, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'n_nationkey': n_nationkey, 'sum_c_acctbal': sum_c_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal_1': avg_c_acctbal, 'c_nationkey': c_nationkey, 'max_c_acctbal_1': max_c_acctbal, 'sum_c_acctbal': sum_c_acctbal}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'avg_s_acctbal_1': avg_s_acctbal, 'max_s_acctbal_1': max_s_acctbal, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index 21297f633..7706c56b5 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,10 +1,9 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey_1 == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name_1, 'total_suppliers': t1.total_suppliers}) - PROJECT(columns={'count_s_suppkey': count_s_suppkey, 'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 7ac114d5f..254b7ce5c 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,6 +1,6 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey_1}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey_1': o_orderkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) + PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 93d6f9723..9bd706f37 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int_1), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(l_discount):desc_last]) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int_1}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int_1': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 4573cc866..7d3cc9e81 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2_1):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2_1': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2):desc_last, (p_name):asc_first]) + LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 085f35880..720bf6ef4 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name_1, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name_1': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) + PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index c21a1b136..9372d9bb7 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name_1, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name_1': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) + LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index d32e6626b..889ab189c 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name_1, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - PROJECT(columns={'c_name_1': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 4da08ecfe..bb1b832a9 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,16 +1,15 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name_1, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - PROJECT(columns={'n_orders': n_orders, 'p_name_1': p_name, 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index d419ed393..5ed26b322 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_cust', n_cust)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cust': t1.n_cust, 'r_name': t0.r_name}) +ROOT(columns=[('name', r_name), ('n_cust', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) - PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) + PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 93d84c026..03ef1ee24 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', cvp_ab_otp)], orderings=[(r_name):asc_first]) - PROJECT(columns={'cvp_ab_otp': ROUND(agg_0, 3:numeric), 'r_name': r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'expr_1': t1.expr_1, 'n_regionkey': t0.n_regionkey}) +ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) + PROJECT(columns={'c_acctbal': c_acctbal, 'expr_1': o_totalprice / 1000000.0:numeric, 'n_regionkey': n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - PROJECT(columns={'expr_1': o_totalprice / 1000000.0:numeric, 'o_custkey': o_custkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_decode3.txt b/tests/test_plan_refsols/sqlite_udf_decode3.txt index 0a5523694..f213966cc 100644 --- a/tests/test_plan_refsols/sqlite_udf_decode3.txt +++ b/tests/test_plan_refsols/sqlite_udf_decode3.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('key', o_orderkey), ('val', val)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'val': val}, orderings=[(o_orderkey):asc_first]) - PROJECT(columns={'o_orderkey': o_orderkey, 'val': DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string)}) - FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first]) + LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}, orderings=[(o_orderkey):asc_first]) + FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt index b485de9d7..1a89dd509 100644 --- a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt +++ b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey), ('d1', d1), ('d2', d2), ('d3', d3), ('d4', d4)], orderings=[(o_totalprice):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'d1': d1, 'd2': d2, 'd3': d3, 'd4': d4, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) - PROJECT(columns={'d1': FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate), 'd2': FORMAT_DATETIME('%Y:%j':string, o_orderdate), 'd3': INTEGER(FORMAT_DATETIME('%s':string, o_orderdate)), 'd4': INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)), 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_gcat.txt b/tests/test_plan_refsols/sqlite_udf_gcat.txt index a9899705e..9c7bd9d68 100644 --- a/tests/test_plan_refsols/sqlite_udf_gcat.txt +++ b/tests/test_plan_refsols/sqlite_udf_gcat.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('name', r_name), ('c1', c1), ('c2', c2), ('c3', c3)], orderings=[(r_name):asc_first]) - PROJECT(columns={'c1': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last]), 'c2': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):desc_first]), 'c3': GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last], cumulative=True), 'r_name': r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('name', r_name), ('c1', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last])), ('c2', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):desc_first])), ('c3', GCAT(args=[r_name, '-':string], partition=[], order=[(r_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first]) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/sqlite_udf_nval.txt b/tests/test_plan_refsols/sqlite_udf_nval.txt index db0ba53e1..6435249df 100644 --- a/tests/test_plan_refsols/sqlite_udf_nval.txt +++ b/tests/test_plan_refsols/sqlite_udf_nval.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', v1), ('v2', v2), ('v3', v3), ('v4', v4)], orderings=[(r_name):asc_first, (n_name):asc_first]) - PROJECT(columns={'n_name': n_name, 'r_name': r_name, 'v1': NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last]), 'v2': NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last]), 'v3': NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None)), 'v4': NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True)}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last])), ('v2', NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last])), ('v3', NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None))), ('v4', NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first, (n_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index 54a6ded26..b9b7d165d 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', r_name), ('pct_cust_positive', pct_cust_positive), ('pct_supp_positive', pct_supp_positive)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'pct_cust_positive': t0.pct_cust_positive, 'pct_supp_positive': t1.pct_supp_positive, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'pct_cust_positive': t1.pct_cust_positive, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) +ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'pct_cust_positive': ROUND(percentage_expr_2, 2:numeric)}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'expr_2': t1.expr_2, 'n_regionkey': t0.n_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) + PROJECT(columns={'expr_2': POSITIVE(c_acctbal), 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_2': POSITIVE(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - PROJECT(columns={'n_regionkey': n_regionkey, 'pct_supp_positive': ROUND(percentage_expr_3, 2:numeric)}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'expr_3': t1.expr_3, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) + PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 'n_regionkey': n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 's_nationkey': s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_relmin.txt b/tests/test_plan_refsols/sqlite_udf_relmin.txt index 9d68f1b7f..a4606bb58 100644 --- a/tests/test_plan_refsols/sqlite_udf_relmin.txt +++ b/tests/test_plan_refsols/sqlite_udf_relmin.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', m1), ('m2', m2), ('m3', m3)], orderings=[(month):asc_first]) - PROJECT(columns={'m1': RELMIN(args=[n_rows], partition=[], order=[]), 'm2': RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True), 'm3': RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)), 'month': month, 'n_rows': n_rows}) - AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate)}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', RELMIN(args=[n_rows], partition=[], order=[])), ('m2', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True)), ('m3', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)))], orderings=[(month):asc_first]) + AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) + PROJECT(columns={'month': MONTH(o_orderdate)}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index f63e075d7..54d45c7b6 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -7,12 +7,10 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey_1 == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows_1, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey_1, 'sum_l_quantity': t0.sum_l_quantity}) - PROJECT(columns={'n_rows_1': n_rows, 'ps_partkey_1': ps_partkey, 'ps_suppkey_1': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_rows_1': n_rows, 'sum_l_quantity': sum_l_quantity}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index a37b6f7bc..6a203c579 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('supplier_name', s_name_1), ('nation_name', n_name_1), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct_1)], orderings=[(national_qty_pct_1):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name_1': n_name, 'national_qty_pct_1': national_qty_pct, 's_name_1': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) +ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index 916816a00..e95879b55 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name_1, 's_name': s_name_1}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name_1': r_name, 's_name_1': s_name}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) + PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 0d29adc03..a380dffc1 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,6 +1,6 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name_1, 'ir': ir, 'purchase_country': name_2}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name_1': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'name_2': name_2}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) + PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index baa9bd355..79a611d7c 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -8,9 +8,8 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id_1 == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - PROJECT(columns={'de_id_1': de_id, 'pr_release': pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index e46df112f..6f30065aa 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,22 +1,21 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows_1, 'month': t0.month_1, 'n_rows': t1.n_rows, 'year': t0.year_1}) - PROJECT(columns={'ca_dt_1': ca_dt, 'month_1': month, 'n_rows_1': n_rows, 'year_1': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index e2c60b089..df4147ff6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,6 +1,6 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand_1, 'pr_name': pr_name_1, 'pr_type': pr_type_1}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand_1': pr_brand, 'pr_name_1': pr_name, 'pr_type_1': pr_type}) + LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) + PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 3734ddf0e..8eac1f4c8 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,21 +4,20 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_4': expr_4, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index c00cf0803..25ad10b87 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,16 +1,15 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_3': expr_3, 'n_rows': n_rows, 'year': YEAR(ca_dt)}) - JOIN(condition=t0.ca_dt_1 == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows_1, 'n_rows': t1.n_rows}) - PROJECT(columns={'ca_dt': ca_dt, 'ca_dt_1': ca_dt, 'n_rows_1': n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 4ab824798..f92f003ec 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,15 +1,14 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal_1, 'c_address': c_address_1, 'c_comment': c_comment_1, 'c_custkey': c_custkey, 'c_name': c_name_1, 'c_phone': c_phone_1, 'n_name': n_name_1}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey': c_custkey, 'c_name_1': c_name, 'c_phone_1': c_phone, 'n_name_1': n_name}) - JOIN(condition=t0.c_nationkey_1 == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal_1, 'c_address': t0.c_address_1, 'c_comment': t0.c_comment_1, 'c_custkey': t0.c_custkey_1, 'c_name': t0.c_name_1, 'c_phone': t0.c_phone_1, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - PROJECT(columns={'c_acctbal_1': c_acctbal, 'c_address_1': c_address, 'c_comment_1': c_comment, 'c_custkey_1': c_custkey, 'c_name_1': c_name, 'c_nationkey_1': c_nationkey, 'c_phone_1': c_phone, 'sum_expr_1': sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) + PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index 06c5ad1a2..bbad37875 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -6,10 +6,9 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name_1, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - PROJECT(columns={'n_name_1': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 3baa9fc80..6f4efa2d8 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -9,12 +9,11 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'p_partkey_1': p_partkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 99e4bd091..e8f41b9b7 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,19 +1,18 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey_1 == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey_1}) - PROJECT(columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey_1': l_suppkey, 'n_name': n_name, 'n_nationkey_1': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index b4f612231..44a91ea4a 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,6 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - PROJECT(columns={'avg_c_acctbal': avg_c_acctbal, 'c_nationkey': c_nationkey, 'count_c_acctbal_1': count_c_acctbal, 'max_c_acctbal_1': max_c_acctbal, 'min_c_acctbal_1': min_c_acctbal, 'n_rows_1': n_rows, 'sum_c_acctbal_1': sum_c_acctbal}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index c85511eca..a367c4443 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index ac1149b97..80af8f609 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId_1, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId_1': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) + LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) + PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index f7f1686d0..6a736c481 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff_1), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff_1):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff_1': years_diff}, orderings=[(years_diff):asc_first]) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff):asc_first]) + LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index aa7347c30..7b2fcb32f 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff_1, - sbtxdatetime + sbtxdatetime, + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 ORDER BY - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) + years_diff LIMIT 30 ) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - years_diff_1 AS years_diff, + years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, @@ -21,4 +21,4 @@ SELECT DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff FROM _t0 ORDER BY - years_diff_1 + years_diff diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index 734f72ca9..4db97a7d2 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff_1, - sbtxdatetime + sbtxdatetime, + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 ORDER BY - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) + years_diff LIMIT 30 ) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - years_diff_1 AS years_diff, + years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -37,4 +37,4 @@ SELECT ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff FROM _t0 ORDER BY - years_diff_1 + years_diff diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index bb8ea4609..81e55dbda 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( sbtxtickerid ), _t0 AS ( SELECT - sbticker.sbtickersymbol AS sbtickersymbol_1, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + sbticker.sbtickersymbol, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - COALESCE(_s1.sum_sbtxamount, 0) DESC + total_amount DESC LIMIT 10 ) SELECT - sbtickersymbol_1 AS symbol, + sbtickersymbol AS symbol, COALESCE(n_rows, 0) AS num_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index bb8ea4609..81e55dbda 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( sbtxtickerid ), _t0 AS ( SELECT - sbticker.sbtickersymbol AS sbtickersymbol_1, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + sbticker.sbtickersymbol, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - COALESCE(_s1.sum_sbtxamount, 0) DESC + total_amount DESC LIMIT 10 ) SELECT - sbtickersymbol_1 AS symbol, + sbtickersymbol AS symbol, COALESCE(n_rows, 0) AS num_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index 7a80c3684..7bdd812ac 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -10,22 +10,22 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 3 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index 103007c2c..280dd33d8 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -10,22 +10,22 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 3 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 16ac04eee..3134aaf0d 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows_1, + COUNT(*) AS n_rows, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -10,9 +10,9 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - _s1.n_rows_1 AS n_rows, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, _s1.sum_sale_price FROM main.salespersons AS salespersons JOIN _s1 AS _s1 @@ -22,8 +22,8 @@ WITH _s1 AS ( LIMIT 5 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, n_rows AS total_sales, COALESCE(sum_sale_price, 0) AS total_revenue FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index b06f6bb94..4a046fb86 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows_1, + COUNT(*) AS n_rows, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -12,9 +12,9 @@ WITH _s1 AS ( salesperson_id ), _t0 AS ( SELECT - salespersons.first_name AS first_name_1, - salespersons.last_name AS last_name_1, - _s1.n_rows_1 AS n_rows, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows, _s1.sum_sale_price FROM main.salespersons AS salespersons JOIN _s1 AS _s1 @@ -24,8 +24,8 @@ WITH _s1 AS ( LIMIT 5 ) SELECT - first_name_1 AS first_name, - last_name_1 AS last_name, + first_name, + last_name, n_rows AS total_sales, COALESCE(sum_sale_price, 0) AS total_revenue FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index 3ef87cc68..fe9f62139 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -8,22 +8,22 @@ WITH _s1 AS ( car_id ), _t0 AS ( SELECT - cars.make AS make_1, - cars.model AS model_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + cars.make, + cars.model, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 5 ) SELECT - make_1 AS make, - model_1 AS model, + make, + model, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index 3ef87cc68..fe9f62139 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -8,22 +8,22 @@ WITH _s1 AS ( car_id ), _t0 AS ( SELECT - cars.make AS make_1, - cars.model AS model_1, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue_1, - _s1.n_rows + cars.make, + cars.model, + _s1.n_rows, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.cars AS cars LEFT JOIN _s1 AS _s1 ON _s1.car_id = cars._id ORDER BY - COALESCE(_s1.sum_sale_price, 0) DESC + total_revenue DESC LIMIT 5 ) SELECT - make_1 AS make, - model_1 AS model, + make, + model, COALESCE(n_rows, 0) AS total_sales, - total_revenue_1 AS total_revenue + total_revenue FROM _t0 ORDER BY - total_revenue_1 DESC + total_revenue DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 4e4bdb420..5e675ed08 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -11,20 +11,20 @@ WITH _s1 AS ( receiver_id ), _t0 AS ( SELECT - merchants.name AS name_1, - COALESCE(_s1.sum_amount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + merchants.name, + COALESCE(_s1.sum_amount, 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid ORDER BY - COALESCE(_s1.sum_amount, 0) DESC + total_amount DESC LIMIT 2 ) SELECT - name_1 AS merchant_name, + name AS merchant_name, COALESCE(n_rows, 0) AS total_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index ed3cfe9ea..cd313570f 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -11,20 +11,20 @@ WITH _s1 AS ( receiver_id ), _t0 AS ( SELECT - merchants.name AS name_1, - COALESCE(_s1.sum_amount, 0) AS total_amount_1, - _s1.n_rows + _s1.n_rows, + merchants.name, + COALESCE(_s1.sum_amount, 0) AS total_amount FROM main.merchants AS merchants LEFT JOIN _s1 AS _s1 ON _s1.receiver_id = merchants.mid ORDER BY - COALESCE(_s1.sum_amount, 0) DESC + total_amount DESC LIMIT 2 ) SELECT - name_1 AS merchant_name, + name AS merchant_name, COALESCE(n_rows, 0) AS total_transactions, - total_amount_1 AS total_amount + total_amount FROM _t0 ORDER BY - total_amount_1 DESC + total_amount DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 672017439..313c5c222 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( coupon_id ), _t0 AS ( SELECT - coupons.code AS code_1, - COALESCE(_s1.count_txid, 0) AS redemption_count_1, + coupons.code, + COALESCE(_s1.count_txid, 0) AS redemption_count, _s1.sum_amount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid ORDER BY - COALESCE(_s1.count_txid, 0) DESC + redemption_count DESC LIMIT 3 ) SELECT - code_1 AS coupon_code, - redemption_count_1 AS redemption_count, + code AS coupon_code, + redemption_count, COALESCE(sum_amount, 0) AS total_discount FROM _t0 ORDER BY - redemption_count_1 DESC + redemption_count DESC diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 672017439..313c5c222 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -8,20 +8,20 @@ WITH _s1 AS ( coupon_id ), _t0 AS ( SELECT - coupons.code AS code_1, - COALESCE(_s1.count_txid, 0) AS redemption_count_1, + coupons.code, + COALESCE(_s1.count_txid, 0) AS redemption_count, _s1.sum_amount FROM main.coupons AS coupons LEFT JOIN _s1 AS _s1 ON _s1.coupon_id = coupons.cid ORDER BY - COALESCE(_s1.count_txid, 0) DESC + redemption_count DESC LIMIT 3 ) SELECT - code_1 AS coupon_code, - redemption_count_1 AS redemption_count, + code AS coupon_code, + redemption_count, COALESCE(sum_amount, 0) AS total_discount FROM _t0 ORDER BY - redemption_count_1 DESC + redemption_count DESC diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index abf9e60c6..5d9adaf39 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -5,11 +5,11 @@ WITH _s2 AS ( FROM events ), _t0 AS ( SELECT - eras.er_name AS er_name_1, - events.ev_name AS ev_name_1, - seasons.s_name AS s_name_1, - times.t_name AS t_name_1, - events.ev_dt + eras.er_name, + events.ev_dt, + events.ev_name, + seasons.s_name, + times.t_name FROM events AS events JOIN eras AS eras ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) @@ -32,11 +32,11 @@ WITH _s2 AS ( LIMIT 6 ) SELECT - ev_name_1 AS event_name, - er_name_1 AS era_name, + ev_name AS event_name, + er_name AS era_name, EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, - s_name_1 AS season_name, - t_name_1 AS tod + s_name AS season_name, + t_name AS tod FROM _t0 ORDER BY ev_dt diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 972bff76a..1cbe48ccc 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -5,11 +5,11 @@ WITH _s2 AS ( FROM events ), _t0 AS ( SELECT - eras.er_name AS er_name_1, - events.ev_name AS ev_name_1, - seasons.s_name AS s_name_1, - times.t_name AS t_name_1, - events.ev_dt + eras.er_name, + events.ev_dt, + events.ev_name, + seasons.s_name, + times.t_name FROM events AS events JOIN eras AS eras ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) @@ -32,11 +32,11 @@ WITH _s2 AS ( LIMIT 6 ) SELECT - ev_name_1 AS event_name, - er_name_1 AS era_name, + ev_name AS event_name, + er_name AS era_name, CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, - s_name_1 AS season_name, - t_name_1 AS tod + s_name AS season_name, + t_name AS tod FROM _t0 ORDER BY ev_dt diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index 2d82d40ff..30ed1ca1b 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,19 +1,19 @@ WITH _t0 AS ( SELECT - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost_1, ps_availqty, ps_partkey, - ps_suppkey + ps_suppkey, + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost FROM tpch.partsupp ORDER BY - CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC + total_cost DESC LIMIT 10 ) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - total_cost_1 AS total_cost + total_cost FROM _t0 ORDER BY - total_cost_1 DESC + total_cost DESC diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index 251ef1ebf..95f227be8 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,21 +1,8 @@ WITH _t0 AS ( SELECT - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost_1, ps_availqty, ps_partkey, - ps_suppkey - FROM tpch.partsupp - ORDER BY + ps_suppkey, CAST(ps_supplycost * ( CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END ) AS INTEGER) + CASE @@ -26,14 +13,17 @@ WITH _t0 AS ( ) THEN 1 ELSE 0 - END DESC + END AS total_cost + FROM tpch.partsupp + ORDER BY + total_cost DESC LIMIT 10 ) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - total_cost_1 AS total_cost + total_cost FROM _t0 ORDER BY - total_cost_1 DESC + total_cost DESC diff --git a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql index 68e1f77a1..28cbbc248 100644 --- a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT AVG(customer.c_acctbal) OVER (PARTITION BY nation.n_regionkey) AS avg_balance, customer.c_acctbal, @@ -8,9 +8,9 @@ WITH _t3 AS ( ON customer.c_nationkey = nation.n_nationkey ), _s3 AS ( SELECT - COALESCE(COUNT(*), 0) AS n_cust, + COUNT(*) AS n_rows, n_regionkey - FROM _t3 + FROM _t2 WHERE ABS(avg_balance - c_acctbal) <= avg_balance * 0.1 GROUP BY @@ -18,7 +18,7 @@ WITH _t3 AS ( ) SELECT region.r_name AS name, - _s3.n_cust + COALESCE(_s3.n_rows, 0) AS n_cust FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql index b4a7156ba..b6fe25cdb 100644 --- a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql @@ -1,3 +1,14 @@ +WITH _t0 AS ( + SELECT + o_orderkey, + o_orderpriority + FROM tpch.orders + WHERE + o_clerk = 'Clerk#000000951' + ORDER BY + o_orderkey + LIMIT 10 +) SELECT o_orderkey AS key, CASE @@ -9,9 +20,6 @@ SELECT THEN 'C' ELSE 'D' END AS val -FROM tpch.orders -WHERE - o_clerk = 'Clerk#000000951' +FROM _t0 ORDER BY o_orderkey -LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql index df8c90691..7d81bf0ec 100644 --- a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql @@ -1,10 +1,19 @@ +WITH _t0 AS ( + SELECT + o_orderdate, + o_orderkey, + o_totalprice + FROM tpch.orders + ORDER BY + o_totalprice + LIMIT 5 +) SELECT o_orderkey AS key, STRFTIME('%d/%m/%Y', o_orderdate) AS d1, STRFTIME('%Y:%j', o_orderdate) AS d2, CAST(STRFTIME('%s', o_orderdate) AS INTEGER) AS d3, CAST(STRFTIME('%Y%m%d', o_orderdate, '+39 days', 'start of month') AS INTEGER) AS d4 -FROM tpch.orders +FROM _t0 ORDER BY o_totalprice -LIMIT 5 diff --git a/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql index 4e168aa30..a7246cb29 100644 --- a/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_gcat_sqlite.sql @@ -1,16 +1,8 @@ -WITH _t0 AS ( - SELECT - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c1, - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c2, - GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS c3, - r_name - FROM tpch.region -) SELECT r_name AS name, - c1, - c2, - c3 -FROM _t0 + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c1, + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS c2, + GROUP_CONCAT(r_name, '-') OVER (ORDER BY r_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS c3 +FROM tpch.region ORDER BY r_name diff --git a/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql index 0d0076c57..9a4baf571 100644 --- a/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_nval_sqlite.sql @@ -1,23 +1,13 @@ -WITH _t0 AS ( - SELECT - NTH_VALUE(nation.n_name, 3) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v1, - NTH_VALUE(nation.n_name, 1) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v2, - NTH_VALUE(nation.n_name, 2) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS v3, - NTH_VALUE(nation.n_name, 5) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS v4, - nation.n_name, - region.r_name - FROM tpch.region AS region - JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey -) SELECT - r_name AS rname, - n_name AS nname, - v1, - v2, - v3, - v4 -FROM _t0 + region.r_name AS rname, + nation.n_name AS nname, + NTH_VALUE(nation.n_name, 3) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v1, + NTH_VALUE(nation.n_name, 1) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS v2, + NTH_VALUE(nation.n_name, 2) OVER (PARTITION BY nation.n_regionkey ORDER BY nation.n_name ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS v3, + NTH_VALUE(nation.n_name, 5) OVER (ORDER BY nation.n_name ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS v4 +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey ORDER BY - r_name, - n_name + region.r_name, + nation.n_name diff --git a/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql index 4dbef9cea..c4ac974c1 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_positive_sqlite.sql @@ -5,12 +5,9 @@ WITH _s0 AS ( FROM tpch.nation ), _s3 AS ( SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN customer.c_acctbal > 0 THEN 1 END) - ) AS REAL) / COUNT(*), - 2 - ) AS pct_cust_positive, + CAST(( + 100.0 * SUM(CASE WHEN customer.c_acctbal > 0 THEN 1 END) + ) AS REAL) / COUNT(*) AS percentage_expr_2, _s0.n_regionkey FROM _s0 AS _s0 JOIN tpch.customer AS customer @@ -19,12 +16,9 @@ WITH _s0 AS ( _s0.n_regionkey ), _s7 AS ( SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN supplier.s_acctbal > 0 THEN 1 END) - ) AS REAL) / COUNT(*), - 2 - ) AS pct_supp_positive, + CAST(( + 100.0 * SUM(CASE WHEN supplier.s_acctbal > 0 THEN 1 END) + ) AS REAL) / COUNT(*) AS percentage_expr_3, _s4.n_regionkey FROM _s0 AS _s4 JOIN tpch.supplier AS supplier @@ -34,8 +28,8 @@ WITH _s0 AS ( ) SELECT region.r_name AS name, - _s3.pct_cust_positive, - _s7.pct_supp_positive + ROUND(_s3.percentage_expr_2, 2) AS pct_cust_positive, + ROUND(_s7.percentage_expr_3, 2) AS pct_supp_positive FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 52747c6e5..64803c51d 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey AS p_partkey_1, + part.p_partkey, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey_1 = partsupp.ps_partkey + ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index c0f053dfc..e5b221d69 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -9,7 +9,7 @@ WITH _s3 AS ( l_partkey ), _s5 AS ( SELECT - part.p_partkey AS p_partkey_1, + part.p_partkey, _s3.sum_l_quantity FROM tpch.part AS part JOIN _s3 AS _s3 @@ -22,7 +22,7 @@ WITH _s3 AS ( partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 - ON _s5.p_partkey_1 = partsupp.ps_partkey + ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) ) From d88cdb51c4ee0a25b5107155c601c7c54803a061 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 03:41:32 -0400 Subject: [PATCH 14/97] Compressing limit into root --- pydough/conversion/column_bubbler.py | 2 +- pydough/conversion/merge_projects.py | 26 +++++++- pydough/relational/relational_nodes/limit.py | 2 +- .../relational_nodes/relational_root.py | 23 +++++-- pydough/sqlglot/sqlglot_relational_visitor.py | 5 ++ .../aggregation_analytics_1.txt | 44 ++++++------- .../aggregation_analytics_2.txt | 32 +++++---- .../aggregation_analytics_3.txt | 32 +++++---- .../avg_order_diff_per_customer.txt | 21 +++--- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 19 +++--- tests/test_plan_refsols/bad_child_reuse_3.txt | 19 +++--- tests/test_plan_refsols/bad_child_reuse_4.txt | 17 +++-- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_aa.txt | 15 ++--- tests/test_plan_refsols/common_prefix_al.txt | 2 +- tests/test_plan_refsols/common_prefix_am.txt | 2 +- tests/test_plan_refsols/common_prefix_an.txt | 2 +- tests/test_plan_refsols/common_prefix_ao.txt | 53 ++++++++------- tests/test_plan_refsols/common_prefix_i.txt | 21 +++--- tests/test_plan_refsols/common_prefix_j.txt | 13 ++-- tests/test_plan_refsols/common_prefix_k.txt | 13 ++-- tests/test_plan_refsols/common_prefix_l.txt | 35 +++++----- tests/test_plan_refsols/common_prefix_m.txt | 35 +++++----- tests/test_plan_refsols/common_prefix_n.txt | 51 +++++++------- tests/test_plan_refsols/common_prefix_o.txt | 55 ++++++++-------- tests/test_plan_refsols/common_prefix_p.txt | 28 ++++---- tests/test_plan_refsols/common_prefix_q.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_r.txt | 36 +++++----- tests/test_plan_refsols/common_prefix_t.txt | 26 ++++---- tests/test_plan_refsols/common_prefix_u.txt | 30 ++++----- tests/test_plan_refsols/common_prefix_v.txt | 15 ++--- tests/test_plan_refsols/common_prefix_w.txt | 17 +++-- tests/test_plan_refsols/common_prefix_x.txt | 22 +++---- tests/test_plan_refsols/common_prefix_y.txt | 22 +++---- tests/test_plan_refsols/common_prefix_z.txt | 15 ++--- tests/test_plan_refsols/correl_19.txt | 17 +++-- tests/test_plan_refsols/correl_22.txt | 17 +++-- tests/test_plan_refsols/correl_25.txt | 41 ++++++------ tests/test_plan_refsols/correl_32.txt | 24 ++++--- ...count_at_most_100_suppliers_per_nation.txt | 2 +- .../customer_largest_order_deltas.txt | 30 ++++----- .../customer_most_recent_orders.txt | 14 ++-- tests/test_plan_refsols/datetime_relative.txt | 2 +- .../test_plan_refsols/deep_best_analysis.txt | 66 +++++++++---------- tests/test_plan_refsols/dumb_aggregation.txt | 2 +- .../epoch_culture_events_info.txt | 25 ++++--- ...ping_event_search_other_users_per_user.txt | 25 ++++--- ...ch_overlapping_event_searches_per_user.txt | 29 ++++---- .../epoch_users_most_cold_war_searches.txt | 23 ++++--- tests/test_plan_refsols/exponentiation.txt | 6 +- .../first_order_per_customer.txt | 13 ++-- tests/test_plan_refsols/floor_and_ceil_2.txt | 6 +- tests/test_plan_refsols/function_sampler.txt | 15 ++--- tests/test_plan_refsols/join_topk.txt | 9 ++- .../minutes_seconds_datediff.txt | 7 +- .../multi_partition_access_1.txt | 5 +- .../test_plan_refsols/order_by_expression.txt | 6 +- .../test_plan_refsols/order_quarter_test.txt | 7 +- .../orders_versus_first_orders.txt | 20 +++--- tests/test_plan_refsols/padding_functions.txt | 5 +- tests/test_plan_refsols/part_reduced_size.txt | 13 ++-- .../parts_quantity_increase_95_96.txt | 36 +++++----- .../quantile_function_test_2.txt | 2 +- .../quantile_function_test_3.txt | 2 +- .../quantile_function_test_4.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 16 ++--- ...rank_parts_per_supplier_region_by_size.txt | 22 +++---- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- tests/test_plan_refsols/sign.txt | 5 +- tests/test_plan_refsols/simple_cross_5.txt | 4 +- tests/test_plan_refsols/simple_cross_7.txt | 24 ++++--- tests/test_plan_refsols/simple_cross_9.txt | 17 +++-- .../simple_filter_top_five.txt | 7 +- .../simple_scan_top_five.txt | 5 +- tests/test_plan_refsols/simple_topk.txt | 5 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 15 ++--- tests/test_plan_refsols/singular5.txt | 27 ++++---- tests/test_plan_refsols/singular6.txt | 27 ++++---- tests/test_plan_refsols/singular7.txt | 29 ++++---- .../test_plan_refsols/sqlite_udf_decode3.txt | 7 +- .../sqlite_udf_format_datetime.txt | 5 +- .../test_plan_refsols/supplier_best_part.txt | 31 +++++---- .../supplier_pct_national_qty.txt | 32 +++++---- .../test_plan_refsols/suppliers_bal_diffs.txt | 14 ++-- ...ograph_battery_failure_rates_anomalies.txt | 30 ++++----- ...chnograph_country_combination_analysis.txt | 30 ++++----- .../technograph_hot_purchase_window.txt | 17 +++-- .../technograph_most_unreliable_products.txt | 24 ++++--- ...top_5_nations_balance_by_num_suppliers.txt | 11 ++-- .../top_5_nations_by_num_supplierss.txt | 11 ++-- .../top_customers_by_orders.txt | 12 ++-- tests/test_plan_refsols/topk_order_by.txt | 5 +- .../test_plan_refsols/topk_order_by_calc.txt | 5 +- .../topk_replace_order_by.txt | 5 +- .../topk_root_different_order_by.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 26 ++++---- tests/test_plan_refsols/tpch_q11.txt | 41 ++++++------ tests/test_plan_refsols/tpch_q13.txt | 17 +++-- tests/test_plan_refsols/tpch_q16.txt | 19 +++--- tests/test_plan_refsols/tpch_q18.txt | 19 +++--- tests/test_plan_refsols/tpch_q2.txt | 27 ++++---- tests/test_plan_refsols/tpch_q20.txt | 37 +++++------ tests/test_plan_refsols/tpch_q21.txt | 48 +++++++------- tests/test_plan_refsols/tpch_q3.txt | 24 ++++--- tests/test_plan_refsols/tpch_q9.txt | 31 +++++---- .../window_sliding_frame_relsize.txt | 10 ++- .../window_sliding_frame_relsum.txt | 10 ++- .../year_month_nation_orders.txt | 25 ++++--- .../years_months_days_hours_datediff.txt | 8 +-- tests/test_sql_refsols/datediff_ansi.sql | 20 ++---- tests/test_sql_refsols/datediff_sqlite.sql | 20 ++---- .../defog_broker_adv10_ansi.sql | 2 +- .../defog_broker_adv10_sqlite.sql | 2 +- .../defog_broker_adv1_ansi.sql | 2 +- .../defog_broker_adv1_sqlite.sql | 2 +- .../defog_broker_adv2_ansi.sql | 2 +- .../defog_broker_adv2_sqlite.sql | 2 +- .../defog_broker_adv4_ansi.sql | 2 +- .../defog_broker_adv4_sqlite.sql | 2 +- .../defog_broker_basic3_ansi.sql | 24 +++---- .../defog_broker_basic3_sqlite.sql | 24 +++---- .../defog_broker_gen4_ansi.sql | 2 +- .../defog_broker_gen4_sqlite.sql | 2 +- .../defog_dealership_adv16_ansi.sql | 2 +- .../defog_dealership_adv16_sqlite.sql | 2 +- .../defog_dealership_basic10_ansi.sql | 27 +++----- .../defog_dealership_basic10_sqlite.sql | 27 +++----- .../defog_dealership_basic5_ansi.sql | 27 +++----- .../defog_dealership_basic5_sqlite.sql | 27 +++----- .../defog_dealership_basic6_ansi.sql | 2 +- .../defog_dealership_basic6_sqlite.sql | 2 +- .../defog_dealership_basic7_ansi.sql | 2 +- .../defog_dealership_basic7_sqlite.sql | 2 +- .../defog_dealership_basic8_ansi.sql | 27 +++----- .../defog_dealership_basic8_sqlite.sql | 27 +++----- .../defog_dealership_gen1_ansi.sql | 2 +- .../defog_dealership_gen1_sqlite.sql | 4 +- .../defog_ewallet_adv15_ansi.sql | 2 +- .../defog_ewallet_adv15_sqlite.sql | 2 +- .../defog_ewallet_basic10_ansi.sql | 24 +++---- .../defog_ewallet_basic10_sqlite.sql | 24 +++---- .../defog_ewallet_basic8_ansi.sql | 24 +++---- .../defog_ewallet_basic8_sqlite.sql | 24 +++---- .../defog_ewallet_basic9_ansi.sql | 2 +- .../defog_ewallet_basic9_sqlite.sql | 2 +- .../epoch_culture_events_info_ansi.sql | 58 +++++++--------- .../epoch_culture_events_info_sqlite.sql | 58 +++++++--------- .../floor_and_ceil_2_ansi.sql | 18 ++--- .../floor_and_ceil_2_sqlite.sql | 48 +++++++------- .../sqlite_udf_decode3_sqlite.sql | 16 ++--- .../sqlite_udf_format_datetime_sqlite.sql | 13 +--- ...h_battery_failure_rates_anomalies_ansi.sql | 2 +- ...battery_failure_rates_anomalies_sqlite.sql | 2 +- ...raph_country_combination_analysis_ansi.sql | 4 +- ...ph_country_combination_analysis_sqlite.sql | 4 +- ...hnograph_most_unreliable_products_ansi.sql | 2 +- ...ograph_most_unreliable_products_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q10_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q10_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 6 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 6 +- tests/test_sql_refsols/tpch_q21_ansi.sql | 46 ++++++------- tests/test_sql_refsols/tpch_q21_sqlite.sql | 50 +++++++------- tests/test_sql_refsols/tpch_q2_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q3_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q3_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 55 +++++++--------- tests/test_sql_refsols/tpch_q9_sqlite.sql | 55 +++++++--------- 170 files changed, 1324 insertions(+), 1574 deletions(-) diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index b09975d6d..d5d0bc131 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -333,4 +333,4 @@ def bubble_column_names(root: RelationalRoot) -> RelationalRoot: ordering.nulls_first, ) ) - return RelationalRoot(new_input, new_ordered_columns, new_orderings) + return RelationalRoot(new_input, new_ordered_columns, new_orderings, root.limit) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index ca2d3f4d1..cc5eac798 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -229,7 +229,7 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: and any(contains_window(expr) for expr in node.columns.values()) ): # Replace all column references in the root's columns with - # the expressions from the child projection.. + # the expressions from the child projection. for idx, (name, expr) in enumerate(node.ordered_columns): new_expr = transpose_expression(expr, child_project.columns) node.columns[name] = new_expr @@ -290,6 +290,30 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: if key_name not in keys_used: new_columns[key_name] = node.input.columns[key_name] return node.input.copy(columns=new_columns) + # Alternatively: if the node is a root and it is on top of a limit, try to + # suck the limit into the root. + if isinstance(node, RelationalRoot) and isinstance(node.input, Limit): + new_orderings: list[ExpressionSortInfo] = [ + ExpressionSortInfo( + transpose_expression(ordering.expr, node.input.columns), + ordering.ascending, + ordering.nulls_first, + ) + for ordering in node.orderings + ] + if node.input.orderings == new_orderings: + # If the orderings are the same, pull in the limit into the root. + # Replace all column references in the root's columns with + # the expressions from the child projection. + for idx, (name, expr) in enumerate(node.ordered_columns): + new_expr = transpose_expression(expr, node.input.columns) + node.columns[name] = new_expr + node.ordered_columns[idx] = (name, new_expr) + node._orderings = new_orderings + node._limit = node.input.limit + # Delete the child projection from the tree, replacing it + # with its input. + node._input = node.input.input return node diff --git a/pydough/relational/relational_nodes/limit.py b/pydough/relational/relational_nodes/limit.py index 9da2f2061..45311aee4 100644 --- a/pydough/relational/relational_nodes/limit.py +++ b/pydough/relational/relational_nodes/limit.py @@ -66,7 +66,7 @@ def to_string(self, compact: bool = False) -> str: orderings: list[str] = [ ordering.to_string(compact) for ordering in self.orderings ] - return f"LIMIT(limit={self.limit}, columns={self.make_column_string(self.columns, compact)}, orderings=[{', '.join(orderings)}])" + return f"LIMIT(limit={self.limit.to_string(compact)}, columns={self.make_column_string(self.columns, compact)}, orderings=[{', '.join(orderings)}])" def accept(self, visitor: "RelationalVisitor") -> None: # type: ignore # noqa return visitor.visit_limit(self) diff --git a/pydough/relational/relational_nodes/relational_root.py b/pydough/relational/relational_nodes/relational_root.py index 03a862552..48d7b4e7e 100644 --- a/pydough/relational/relational_nodes/relational_root.py +++ b/pydough/relational/relational_nodes/relational_root.py @@ -25,6 +25,7 @@ def __init__( input: RelationalNode, ordered_columns: list[tuple[str, RelationalExpression]], orderings: list[ExpressionSortInfo] | None = None, + limit: RelationalExpression | None = None, ) -> None: columns = dict(ordered_columns) assert len(columns) == len(ordered_columns), ( @@ -35,6 +36,7 @@ def __init__( self._orderings: list[ExpressionSortInfo] = ( [] if orderings is None else orderings ) + self._limit: RelationalExpression | None = limit @property def ordered_columns(self) -> list[tuple[str, RelationalExpression]]: @@ -51,6 +53,13 @@ def orderings(self) -> list[ExpressionSortInfo]: """ return self._orderings + @property + def limit(self) -> RelationalExpression | None: + """ + The limit on the number of rows in the final output, if any. + """ + return self._limit + def node_equals(self, other: RelationalNode) -> bool: return ( isinstance(other, RelationalRoot) @@ -67,9 +76,13 @@ def to_string(self, compact: bool = False) -> str: orderings: list[str] = [ ordering.to_string(compact) for ordering in self.orderings ] - return ( - f"ROOT(columns=[{', '.join(columns)}], orderings=[{', '.join(orderings)}])" - ) + kwargs: list[tuple[str, str]] = [ + ("columns", f"[{', '.join(columns)}]"), + ("orderings", f"[{', '.join(orderings)}]"), + ] + if self.limit is not None: + kwargs.append(("limit", self.limit.to_string(compact))) + return f"ROOT({', '.join(f'{k}={v}' for k, v in kwargs)})" def accept(self, visitor: "RelationalVisitor") -> None: # type: ignore # noqa visitor.visit_root(self) @@ -81,4 +94,6 @@ def node_copy( ) -> RelationalNode: assert len(inputs) == 1, "Root node should have exactly one input" assert columns == self.columns, "Root columns should not be modified" - return RelationalRoot(inputs[0], self.ordered_columns, self.orderings) + return RelationalRoot( + inputs[0], self.ordered_columns, self.orderings, self.limit + ) diff --git a/pydough/sqlglot/sqlglot_relational_visitor.py b/pydough/sqlglot/sqlglot_relational_visitor.py index 5db263795..53fb7d288 100644 --- a/pydough/sqlglot/sqlglot_relational_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_visitor.py @@ -565,6 +565,11 @@ def visit_root(self, root: RelationalRoot) -> None: query = self._build_subquery(input_expr, exprs, sort=False) if ordering_exprs: query = query.order_by(*ordering_exprs) + if root.limit is not None: + limit_expr: SQLGlotExpression = self._expr_visitor.relational_to_sqlglot( + root.limit + ) + query = query.limit(limit_expr) self._stack.append(query) def relational_to_sqlglot(self, root: RelationalRoot) -> SQLGlotExpression: diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index 993f46ef8..f83dbaec8 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,23 +1,21 @@ -ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) + JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index a3fc5b678..2be73d9d4 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('part_name', p_name), ('revenue_generated', revenue_generated)], orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=4, type=NumericType()), columns={'p_name': p_name, 'revenue_generated': revenue_generated}, orderings=[(revenue_generated):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_generated': ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 8071a16f2..0945b2982 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('part_name', p_name), ('revenue_ratio', revenue_ratio)], orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'p_name': p_name, 'revenue_ratio': revenue_ratio}, orderings=[(revenue_ratio):asc_first, (p_name):asc_first]) - PROJECT(columns={'p_name': p_name, 'revenue_ratio': ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) + PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index 3f8662435..5a91d29b9 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_diff': avg_diff, 'c_name': c_name}, orderings=[(avg_diff):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'avg_diff': AVG(day_diff)}) - PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'avg_diff': AVG(day_diff)}) + PROJECT(columns={'day_diff': DATEDIFF('days':string, PREV(args=[o_orderdate], partition=[o_custkey], order=[(o_orderdate):asc_last]), o_orderdate), 'o_custkey': o_custkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index df12efecf..9a59bd793 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 128fe3cf1..9aa529377 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 128fe3cf1..9aa529377 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 0bcaee7d1..510790836 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index 98c75eec1..8d79ea81a 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) + LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index ce3c2be2b..71a7e20e6 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 240fcb590..d5af6cd67 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 23c6a811a..f42e5981e 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 6d114b0ce..33522fc5d 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=50, type=NumericType()), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 4ac379581..9b504757d 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,27 +1,26 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - LIMIT(limit=Literal(value=35, type=NumericType()), columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index 5056ae384..c74e5591b 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(n_rows):desc_last, (n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) +ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index a338a9b8e..5cd83f892 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 4c3a0abf6..41de2f7c4 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 027e4d8e7..7ce4fe3b2 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index f91eee7b1..7dda4ae8a 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'avg_s_acctbal': avg_s_acctbal, 'c_name': c_name, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_name': n_name, 'n_rows': n_rows, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) >= 5:numeric, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 71e91f37d..fc3a3530b 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,26 +1,25 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 907f12e2a..1916a60a9 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,28 +1,27 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}, orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first]) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 3d779be70..f3d26328c 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,15 +1,13 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(ordering_3):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': ordering_3}, orderings=[(ordering_3):asc_first, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'ndistinct_l_partkey': ndistinct_l_partkey, 'ordering_3': DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)):asc_first, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index e7f6f5fd0..c9471f9aa 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('total_spent', total_spent), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_l_extendedprice': max_l_extendedprice, 'max_p_name': max_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 355dc9ad3..50f09aa93 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', total_spent)], orderings=[(total_spent):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': total_spent}, orderings=[(total_spent):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_anything_l_extendedprice': ANYTHING(anything_l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t0.anything_l_extendedprice, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - AGGREGATE(keys={'l_partkey': l_partkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_anything_l_extendedprice': t1.max_anything_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=sum_sum_n_rows > 0:numeric, columns={'max_anything_anything_l_extendedprice': max_anything_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_anything_l_extendedprice': MAX(anything_anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_o_totalprice': SUM(o_totalprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_anything_l_extendedprice': t1.anything_anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'sum_n_rows': t1.sum_n_rows}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_anything_l_extendedprice': ANYTHING(anything_l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t0.anything_l_extendedprice, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + AGGREGATE(keys={'l_partkey': l_partkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'n_rows': COUNT()}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index e350ba091..9095c85fb 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,14 +1,12 @@ -ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index b76d2a813..796e244a7 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('total_qty', total_qty)], orderings=[(total_qty):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'total_qty': total_qty}, orderings=[(total_qty):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'total_qty': DEFAULT_TO(sum_sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 2c10e35a7..3dc65ce4f 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'r_name': r_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 4b633dbd0..2d7e3a5d0 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first], limit=5:numeric) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 9a5054bce..9de4be686 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 5a92df8a2..938d31871 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('name', c_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) +ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 75f223d8a..23dd535b3 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'n_name': n_name}, orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 528439e8a..a34916e67 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_super_cust': n_super_cust, 'supplier_name': supplier_name}, orderings=[(n_super_cust):desc_last]) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - FILTER(condition=c_acctbal > s_acctbal, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) +ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) + AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) + FILTER(condition=c_acctbal > s_acctbal, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index 97f8f16d3..88b3d92e9 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_types': n_types, 'p_container': p_container}, orderings=[(n_types):desc_last, (p_container):asc_first]) - AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - FILTER(condition=avg_p_retailprice > global_avg_price, columns={'p_container': p_container}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'global_avg_price': t0.global_avg_price, 'p_container': t1.p_container}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_retailprice': p_retailprice, 'p_type': p_type}) +ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) + AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) + FILTER(condition=avg_p_retailprice > global_avg_price, columns={'p_container': p_container}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'global_avg_price': t0.global_avg_price, 'p_container': t1.p_container}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index 8de29facd..47e41917c 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', anything_r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', anything_n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_r_name': anything_r_name, 'anything_r_regionkey': anything_r_regionkey, 'n_urgent_semi_domestic_rail_orders': n_urgent_semi_domestic_rail_orders}, orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first]) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_r_name': ANYTHING(r_name), 'anything_r_regionkey': ANYTHING(r_regionkey), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) - FILTER(condition=name_12 != n_name & expr_3 == r_name, columns={'c_custkey': c_custkey, 'c_name': c_name, 'l_orderkey': l_orderkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'expr_3': t1.r_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'name_12': t1.n_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', anything_r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', anything_n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_r_name': ANYTHING(r_name), 'anything_r_regionkey': ANYTHING(r_regionkey), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) + FILTER(condition=name_12 != n_name & expr_3 == r_name, columns={'c_custkey': c_custkey, 'c_name': c_name, 'l_orderkey': l_orderkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'expr_3': t1.r_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'name_12': t1.n_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index e1c8d129c..578c3368b 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('customer_name', anything_c_name), ('delta', delta)], orderings=[(delta):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'anything_c_name': anything_c_name, 'delta': delta}, orderings=[(delta):asc_first]) - PROJECT(columns={'anything_c_name': anything_c_name, 'delta': ABS(anything_c_acctbal - median_s_acctbal)}) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) - JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_phone': s_phone}) +ROOT(columns=[('customer_name', anything_c_name), ('delta', ABS(anything_c_acctbal - median_s_acctbal))], orderings=[(ABS(anything_c_acctbal - median_s_acctbal)):asc_first], limit=5:numeric) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'anything_c_acctbal': ANYTHING(c_acctbal), 'anything_c_name': ANYTHING(c_name), 'median_s_acctbal': MEDIAN(s_acctbal)}) + JOIN(condition=SLICE(t1.s_phone, -1:numeric, None:unknown, None:unknown) == SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_phone': s_phone}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index 23a78a9e5..3c7ff73c0 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - LIMIT(limit=Literal(value=100, type=NumericType()), columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) + LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 1fa05bd28..97aba3b36 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('name', c_name), ('largest_diff', largest_diff)], orderings=[(largest_diff):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'largest_diff': largest_diff}, orderings=[(largest_diff):desc_last]) - PROJECT(columns={'c_name': c_name, 'largest_diff': IFF(ABS(min_diff) > max_diff, min_diff, max_diff)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) - PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) - FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) +ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) + PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) + FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) + PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 52700565c..5f000a001 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('name', c_name), ('total_recent_value', total_recent_value)], orderings=[(total_recent_value):desc_last]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'c_name': c_name, 'total_recent_value': total_recent_value}, orderings=[(total_recent_value):desc_last]) - PROJECT(columns={'c_name': c_name, 'total_recent_value': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/datetime_relative.txt b/tests/test_plan_refsols/datetime_relative.txt index ddd91f4af..be99d1ef3 100644 --- a/tests/test_plan_refsols/datetime_relative.txt +++ b/tests/test_plan_refsols/datetime_relative.txt @@ -1,3 +1,3 @@ ROOT(columns=[('d1', DATETIME(o_orderdate, 'Start of Year':string)), ('d2', DATETIME(o_orderdate, 'START OF MONTHS':string)), ('d3', DATETIME(o_orderdate, '-11 years':string, '+9 months':string, ' - 7 DaYs ':string, '+5 h':string, '-3 minutes':string, '+1 second':string)), ('d4', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of hour':string)), ('d5', DATETIME(Timestamp('2025-07-04 12:58:45'):datetime, 'start of minute':string)), ('d6', DATETIME(Timestamp('2025-07-14 12:58:45'):datetime, '+ 1000000 seconds':string))], orderings=[(o_orderdate):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) + LIMIT(limit=10:numeric, columns={'o_orderdate': o_orderdate}, orderings=[(o_custkey):asc_first, (o_orderdate):asc_first]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index b1b75726f..bccadf51a 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,35 +1,33 @@ -ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', cr_bal), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', cg_key)], orderings=[(n_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': cg_key, 'cr_bal': cr_bal, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}, orderings=[(n_name):asc_first]) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cg_key': key_54, 'cr_bal': account_balance_21, 'n_name': n_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_name': r_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) +ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', c_custkey), ('c_bal', c_acctbal), ('cr_bal', account_balance_21), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', key_54)], orderings=[(n_name):asc_first], limit=10:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'key_54': t1.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t0.account_balance_21, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'account_balance_21': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index 2604e2675..2a9906b01 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=2:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index d2b29d459..ebf3fce9c 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first]) - LIMIT(limit=Literal(value=6, type=NumericType()), columns={'er_name': er_name, 'ev_dt': ev_dt, 'ev_name': ev_name, 's_name': s_name, 't_name': t_name}, orderings=[(ev_dt):asc_first]) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) - FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) +ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) - SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) + SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 32d0a2e7c..833fdf684 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first]) - LIMIT(limit=Literal(value=7, type=NumericType()), columns={'anything_user_name': anything_user_name, 'n_other_users': n_other_users}, orderings=[(n_other_users):desc_last, (anything_user_name):asc_first]) - AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) - FILTER(condition=name_9 != user_name, columns={'user_id': user_id, 'user_id_11': user_id_11, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) +ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first], limit=7:numeric) + AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) + FILTER(condition=name_9 != user_name, columns={'user_id': user_id, 'user_id_11': user_id_11, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index 37d9d53b4..91e424686 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first]) - LIMIT(limit=Literal(value=4, type=NumericType()), columns={'anything_anything_user_name': anything_anything_user_name, 'n_searches': n_searches}, orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first]) - AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) - FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) - AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) - FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) +ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first], limit=4:numeric) + AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) + FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) + AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) + FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index fcbb954a6..32fa17508 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'n_cold_war_searches': n_cold_war_searches, 'user_name': user_name}, orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first]) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) - AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) - SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) +ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) + AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) + SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index 0c1fec6b1..ad7f1ae11 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('low_square', low_square), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(low_square):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'low_square': low_square, 'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) - PROJECT(columns={'low_square': sbDpLow ** 2:numeric, 'sbDpLow': sbDpLow}) - SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) +ROOT(columns=[('low_square', sbDpLow ** 2:numeric), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(sbDpLow ** 2:numeric):asc_first], limit=10:numeric) + SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index f1b37b331..ef68303cf 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) - FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/floor_and_ceil_2.txt b/tests/test_plan_refsols/floor_and_ceil_2.txt index 29b9464c1..8a0835a61 100644 --- a/tests/test_plan_refsols/floor_and_ceil_2.txt +++ b/tests/test_plan_refsols/floor_and_ceil_2.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', total_cost)], orderings=[(total_cost):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': total_cost}, orderings=[(total_cost):desc_last]) - PROJECT(columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'total_cost': CEIL(ps_supplycost * FLOOR(ps_availqty))}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('supplier_key', ps_suppkey), ('part_key', ps_partkey), ('complete_parts', FLOOR(ps_availqty)), ('total_cost', CEIL(ps_supplycost * FLOOR(ps_availqty)))], orderings=[(CEIL(ps_supplycost * FLOOR(ps_availqty))):desc_last], limit=10:numeric) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index b3bce5eaf..f6aa4aa45 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name, 'r_name': r_name}, orderings=[(c_address):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) +ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first], limit=10:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/join_topk.txt b/tests/test_plan_refsols/join_topk.txt index fc19c4da5..f92640023 100644 --- a/tests/test_plan_refsols/join_topk.txt +++ b/tests/test_plan_refsols/join_topk.txt @@ -1,5 +1,4 @@ -ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'r_name': r_name}, orderings=[(n_name):asc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last], limit=10:numeric) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/minutes_seconds_datediff.txt b/tests/test_plan_refsols/minutes_seconds_datediff.txt index 4ae64d60c..ff29298d5 100644 --- a/tests/test_plan_refsols/minutes_seconds_datediff.txt +++ b/tests/test_plan_refsols/minutes_seconds_datediff.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime}, orderings=[(sbTxDateTime):desc_last]) - FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y', datetime.datetime(2023, 4, 3, 13, 16, 30):datetime), ('minutes_diff', DATEDIFF('m':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime)), ('seconds_diff', DATEDIFF('s':string, sbTxDateTime, datetime.datetime(2023, 4, 3, 13, 16, 30):datetime))], orderings=[(sbTxDateTime):desc_last], limit=30:numeric) + FILTER(condition=YEAR(sbTxDateTime) <= 2024:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/multi_partition_access_1.txt b/tests/test_plan_refsols/multi_partition_access_1.txt index d4830999a..77ac57e0a 100644 --- a/tests/test_plan_refsols/multi_partition_access_1.txt +++ b/tests/test_plan_refsols/multi_partition_access_1.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('symbol', sbTickerSymbol)], orderings=[(sbTickerSymbol):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbTickerSymbol': sbTickerSymbol}, orderings=[(sbTickerSymbol):asc_first]) - SCAN(table=main.sbTicker, columns={'sbTickerSymbol': sbTickerSymbol}) +ROOT(columns=[('symbol', sbTickerSymbol)], orderings=[(sbTickerSymbol):asc_first], limit=5:numeric) + SCAN(table=main.sbTicker, columns={'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/order_by_expression.txt b/tests/test_plan_refsols/order_by_expression.txt index f9509574f..07a7a916f 100644 --- a/tests/test_plan_refsols/order_by_expression.txt +++ b/tests/test_plan_refsols/order_by_expression.txt @@ -1,4 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(ordering_1):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'ordering_1': ordering_1, 'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(ordering_1):asc_first]) - PROJECT(columns={'ordering_1': LENGTH(r_name), 'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(LENGTH(r_name)):asc_first], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/order_quarter_test.txt b/tests/test_plan_refsols/order_quarter_test.txt index 8666a4eba..d03ce7a7e 100644 --- a/tests/test_plan_refsols/order_quarter_test.txt +++ b/tests/test_plan_refsols/order_quarter_test.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first]) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_first]) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) +ROOT(columns=[('order_date', o_orderdate), ('quarter', QUARTER(o_orderdate)), ('quarter_start', DATETIME(o_orderdate, 'start of quarter':string)), ('next_quarter', DATETIME(o_orderdate, '+1 quarter':string)), ('prev_quarter', DATETIME(o_orderdate, '-1 quarter':string)), ('two_quarters_ahead', DATETIME(o_orderdate, '+2 quarters':string)), ('two_quarters_behind', DATETIME(o_orderdate, '-2 quarters':string)), ('quarters_since_1995', DATEDIFF('quarter':string, '1995-01-01':string, o_orderdate)), ('quarters_until_2000', DATEDIFF('quarter':string, o_orderdate, '2000-01-01':string)), ('same_quarter_prev_year', DATETIME(o_orderdate, '-4 quarters':string)), ('same_quarter_next_year', DATETIME(o_orderdate, '+4 quarters':string))], orderings=[(o_orderdate):asc_first], limit=1:numeric) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 254b7ce5c..cf6a02f43 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', days_since_first_order)], orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'days_since_first_order': days_since_first_order, 'o_orderkey': o_orderkey}, orderings=[(days_since_first_order):desc_last, (c_name):asc_first]) - PROJECT(columns={'c_name': c_name, 'days_since_first_order': DATEDIFF('days':string, order_date_8, o_orderdate), 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) +ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/padding_functions.txt b/tests/test_plan_refsols/padding_functions.txt index f511fb5a2..587435263 100644 --- a/tests/test_plan_refsols/padding_functions.txt +++ b/tests/test_plan_refsols/padding_functions.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbCustName': sbCustName}, orderings=[(sbCustName):asc_first]) - SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) +ROOT(columns=[('original_name', sbCustName), ('ref_rpad', RPAD('Cust0001':string, 30:numeric, '*':string)), ('ref_lpad', LPAD('Cust0001':string, 30:numeric, '*':string)), ('right_padded', RPAD(sbCustName, 30:numeric, '*':string)), ('left_padded', LPAD(sbCustName, 30:numeric, '#':string)), ('truncated_right', RPAD(sbCustName, 8:numeric, '-':string)), ('truncated_left', LPAD(sbCustName, 8:numeric, '-':string)), ('zero_pad_right', RPAD(sbCustName, 0:numeric, '.':string)), ('zero_pad_left', LPAD(sbCustName, 0:numeric, '.':string)), ('right_padded_space', RPAD(sbCustName, 30:numeric, ' ':string)), ('left_padded_space', LPAD(sbCustName, 30:numeric, ' ':string))], orderings=[(sbCustName):asc_first], limit=5:numeric) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 9bd706f37..a4d8aee87 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,7 +1,6 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'l_discount': l_discount, 'l_receiptdate': l_receiptdate, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(l_discount):desc_last]) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) + LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) + PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 7d3cc9e81..99a8d19a6 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(ordering_2):desc_last, (p_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'agg_1': agg_1, 'ordering_2': ordering_2, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(ordering_2):desc_last, (p_name):asc_first]) - PROJECT(columns={'agg_1': agg_1, 'ordering_2': DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric), 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) +ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 4d612371a..a500ccec6 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 4d612371a..a500ccec6 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 0059ac367..c9a18d116 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) + LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 720bf6ef4..1e519b6c2 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,9 +1,7 @@ -ROOT(columns=[('name', n_name), ('rank', rank)], orderings=[(rank):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'rank': rank}, orderings=[(rank):asc_first]) - PROJECT(columns={'n_name': n_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) +ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 9372d9bb7..7fd4a68da 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,12 +1,10 @@ -ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', rank)], orderings=[(p_partkey):asc_first]) - LIMIT(limit=Literal(value=15, type=NumericType()), columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': rank}, orderings=[(p_partkey):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'r_name': r_name, 'rank': RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) +ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index 1a2c4b75c..52ebabdc7 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,7 +1,7 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):desc_last]) + LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/sign.txt b/tests/test_plan_refsols/sign.txt index 7c2d6ac5c..2e7984bf3 100644 --- a/tests/test_plan_refsols/sign.txt +++ b/tests/test_plan_refsols/sign.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'sbDpHigh': sbDpHigh}, orderings=[(sbDpHigh):asc_first]) - SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) +ROOT(columns=[('high', sbDpHigh), ('high_neg', -1:numeric * sbDpHigh), ('high_zero', 0:numeric * sbDpHigh), ('sign_high', SIGN(sbDpHigh)), ('sign_high_neg', SIGN(-1:numeric * sbDpHigh)), ('sign_high_zero', SIGN(0:numeric * sbDpHigh))], orderings=[(sbDpHigh):asc_first], limit=5:numeric) + SCAN(table=main.sbDailyPrice, columns={'sbDpHigh': sbDpHigh}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index 995757451..c3fd447c1 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ('best_order_priority_qty', total_qty)], orderings=[(p_size):asc_first]) JOIN(condition=t0.p_size == t1.anything_p_size, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):asc_first]) + LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) @@ -10,7 +10,7 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'p_size': p_size}, orderings=[(p_size):asc_first]) + LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 512241e07..e88f1ca6d 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', n_other_orders)], orderings=[(n_other_orders):desc_last, (o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_other_orders': n_other_orders, 'o_orderkey': o_orderkey}, orderings=[(n_other_orders):desc_last, (o_orderkey):asc_first]) - PROJECT(columns={'n_other_orders': DEFAULT_TO(n_rows, 0:numeric), 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=customer_key_3 == o_custkey & order_date_5 == o_orderdate & key_4 > o_orderkey, columns={'o_orderkey': o_orderkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_key_3': t1.o_custkey, 'key_4': t1.o_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_5': t1.o_orderdate}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) +ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (o_orderkey):asc_first], limit=5:numeric) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=customer_key_3 == o_custkey & order_date_5 == o_orderdate & key_4 > o_orderkey, columns={'o_orderkey': o_orderkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_key_3': t1.o_custkey, 'key_4': t1.o_orderkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_5': t1.o_orderdate}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index 95f9968bd..805800c40 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('n1', n_name), ('n2', name_9)], orderings=[(n_name):asc_first, (name_9):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'name_9': name_9}, orderings=[(n_name):asc_first, (name_9):asc_first]) - FILTER(condition=n_name != name_9, columns={'n_name': n_name, 'name_9': name_9}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'name_9': t1.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) +ROOT(columns=[('n1', n_name), ('n2', name_9)], orderings=[(n_name):asc_first, (name_9):asc_first], limit=10:numeric) + FILTER(condition=n_name != name_9, columns={'n_name': n_name, 'name_9': name_9}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'name_9': t1.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_filter_top_five.txt b/tests/test_plan_refsols/simple_filter_top_five.txt index f9546284c..393975351 100644 --- a/tests/test_plan_refsols/simple_filter_top_five.txt +++ b/tests/test_plan_refsols/simple_filter_top_five.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderkey': o_orderkey}, orderings=[(o_orderkey):desc_last]) - FILTER(condition=o_totalprice < 1000.0:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):desc_last], limit=5:numeric) + FILTER(condition=o_totalprice < 1000.0:numeric, columns={'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/simple_scan_top_five.txt b/tests/test_plan_refsols/simple_scan_top_five.txt index 55511d884..796a57a65 100644 --- a/tests/test_plan_refsols/simple_scan_top_five.txt +++ b/tests/test_plan_refsols/simple_scan_top_five.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderkey': o_orderkey}, orderings=[(o_orderkey):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) +ROOT(columns=[('key', o_orderkey)], orderings=[(o_orderkey):asc_first], limit=5:numeric) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_topk.txt b/tests/test_plan_refsols/simple_topk.txt index e7159d251..361fe25b0 100644 --- a/tests/test_plan_refsols/simple_topk.txt +++ b/tests/test_plan_refsols/simple_topk.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=2, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last], limit=2:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index 646e5c0b3..6d1870895 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) + LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index 889ab189c..77ed0b3c7 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'o_orderdate': o_orderdate}, orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) +ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index 8c1a46891..20517bc26 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}, orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first]) - FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) - AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) +ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first], limit=5:numeric) + FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) + AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) + FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index 48fc18c62..c118e82c9 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_name': c_name, 'l_receiptdate': l_receiptdate, 'n_name': n_name}, orderings=[(l_receiptdate):asc_first, (c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) - FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) - FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) + FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index bb1b832a9..f03df0318 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_orders': n_orders, 'p_name': p_name, 's_name': s_name}, orderings=[(n_orders):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) - FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_decode3.txt b/tests/test_plan_refsols/sqlite_udf_decode3.txt index f213966cc..1d28d62e6 100644 --- a/tests/test_plan_refsols/sqlite_udf_decode3.txt +++ b/tests/test_plan_refsols/sqlite_udf_decode3.txt @@ -1,4 +1,3 @@ -ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}, orderings=[(o_orderkey):asc_first]) - FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('key', o_orderkey), ('val', DECODE3(INTEGER(SLICE(o_orderpriority, None:unknown, 1:numeric, None:unknown)), 1:numeric, 'A':string, 2:numeric, 'B':string, 3:numeric, 'C':string, 'D':string))], orderings=[(o_orderkey):asc_first], limit=10:numeric) + FILTER(condition=o_clerk == 'Clerk#000000951':string, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt index 1a89dd509..fb580b235 100644 --- a/tests/test_plan_refsols/sqlite_udf_format_datetime.txt +++ b/tests/test_plan_refsols/sqlite_udf_format_datetime.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):asc_first]) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) +ROOT(columns=[('key', o_orderkey), ('d1', FORMAT_DATETIME('%d/%m/%Y':string, o_orderdate)), ('d2', FORMAT_DATETIME('%Y:%j':string, o_orderdate)), ('d3', INTEGER(FORMAT_DATETIME('%s':string, o_orderdate))), ('d4', INTEGER(FORMAT_DATETIME_VARIADIC('%Y%m%d':string, o_orderdate, '+39 days':string, 'start of month':string)))], orderings=[(o_totalprice):asc_first], limit=5:numeric) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 54d45c7b6..5dbeb3b2f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=3, type=NumericType()), columns={'n_rows': n_rows, 'p_name': p_name, 'quantity': quantity, 's_name': s_name}, orderings=[(quantity):desc_last, (s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first], limit=3:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) + PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 6a203c579..9c00e03aa 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,17 +1,15 @@ -ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', national_qty_pct)], orderings=[(national_qty_pct):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'national_qty_pct': national_qty_pct, 's_name': s_name, 'sum_l_quantity': sum_l_quantity}, orderings=[(national_qty_pct):desc_last]) - PROJECT(columns={'n_name': n_name, 'national_qty_pct': 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]), 's_name': s_name, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) +ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index e95879b55..998e0dde9 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,8 +1,6 @@ -ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', acctbal_delta)], orderings=[(acctbal_delta):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'acctbal_delta': acctbal_delta, 'r_name': r_name, 's_name': s_name}, orderings=[(acctbal_delta):desc_last]) - PROJECT(columns={'acctbal_delta': s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]), 'r_name': r_name, 's_name': s_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]))], orderings=[(s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last])):desc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index d1fba65bb..995700439 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ir)], orderings=[(ir):desc_last, (pr_name):asc_first, (co_name):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'pr_name': pr_name}, orderings=[(ir):desc_last, (pr_name):asc_first, (co_name):asc_first]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_name': pr_name}) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index a380dffc1..431996dec 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,16 +1,14 @@ -ROOT(columns=[('factory_country', co_name), ('purchase_country', purchase_country), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'co_name': co_name, 'ir': ir, 'purchase_country': purchase_country}, orderings=[(ir):desc_last]) - PROJECT(columns={'co_name': co_name, 'ir': ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric), 'purchase_country': name_2}) - JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_hot_purchase_window.txt b/tests/test_plan_refsols/technograph_hot_purchase_window.txt index 9d2538e42..52172962b 100644 --- a/tests/test_plan_refsols/technograph_hot_purchase_window.txt +++ b/tests/test_plan_refsols/technograph_hot_purchase_window.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('start_of_period', start_of_period), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (start_of_period):asc_first]) - LIMIT(limit=Literal(value=1, type=NumericType()), columns={'n_purchases': n_purchases, 'start_of_period': start_of_period}, orderings=[(n_purchases):desc_last, (start_of_period):asc_first]) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT(), 'start_of_period': ANYTHING(ca_dt)}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - FILTER(condition=calendar_day_1 < DATETIME(ca_dt, '+5 days':string) & calendar_day_1 >= ca_dt, columns={'ca_dt': ca_dt, 'calendar_day_1': calendar_day_1}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) +ROOT(columns=[('start_of_period', start_of_period), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (start_of_period):asc_first], limit=1:numeric) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT(), 'start_of_period': ANYTHING(ca_dt)}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + FILTER(condition=calendar_day_1 < DATETIME(ca_dt, '+5 days':string) & calendar_day_1 >= ca_dt, columns={'ca_dt': ca_dt, 'calendar_day_1': calendar_day_1}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index df4147ff6..46de1e87a 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ir)], orderings=[(ir):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'ir': ir, 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}, orderings=[(ir):desc_last]) - PROJECT(columns={'ir': ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric), 'pr_brand': pr_brand, 'pr_name': pr_name, 'pr_type': pr_type}) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) + PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 404152d0e..b24f78d73 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'ordering_0': ordering_0, 'sum_s_acctbal': sum_s_acctbal}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) +ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 9efe788ce..7d8c385c4 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ordering_0': ordering_0}, orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index a9e71b355..f2e888a70 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,7 +1,5 @@ -ROOT(columns=[('customer_key', c_custkey), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(n_orders):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'c_custkey': c_custkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/topk_order_by.txt b/tests/test_plan_refsols/topk_order_by.txt index b0a7cf44a..140c40392 100644 --- a/tests/test_plan_refsols/topk_order_by.txt +++ b/tests/test_plan_refsols/topk_order_by.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):asc_last], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/topk_order_by_calc.txt b/tests/test_plan_refsols/topk_order_by_calc.txt index 58dcdcb9f..23736fba9 100644 --- a/tests/test_plan_refsols/topk_order_by_calc.txt +++ b/tests/test_plan_refsols/topk_order_by_calc.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_name': r_name}, orderings=[(r_name):asc_last]) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) +ROOT(columns=[('region_name', r_name), ('name_length', LENGTH(r_name))], orderings=[(r_name):asc_last], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/topk_replace_order_by.txt b/tests/test_plan_refsols/topk_replace_order_by.txt index b677480ad..093f06879 100644 --- a/tests/test_plan_refsols/topk_replace_order_by.txt +++ b/tests/test_plan_refsols/topk_replace_order_by.txt @@ -1,3 +1,2 @@ -ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):desc_first]) - SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first], limit=10:numeric) + SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/topk_root_different_order_by.txt b/tests/test_plan_refsols/topk_root_different_order_by.txt index ec47fee47..3faee21c0 100644 --- a/tests/test_plan_refsols/topk_root_different_order_by.txt +++ b/tests/test_plan_refsols/topk_root_different_order_by.txt @@ -1,3 +1,3 @@ ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[(r_name):desc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_first]) + LIMIT(limit=10:numeric, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}, orderings=[(r_name):asc_first]) SCAN(table=tpch.REGION, columns={'r_comment': r_comment, 'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index f92f003ec..cc56c28d9 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,14 +1,12 @@ -ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', REVENUE), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - LIMIT(limit=Literal(value=20, type=NumericType()), columns={'REVENUE': REVENUE, 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}, orderings=[(REVENUE):desc_last, (c_custkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_phone': c_phone, 'n_name': n_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 207c69bc1..1c7b826c2 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}, orderings=[(VALUE):desc_last]) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) + FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) + PROJECT(columns={'metric': ps_supplycost * ps_availqty}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) + PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index 04c2c6768..ee9fe24e8 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'CUSTDIST': CUSTDIST, 'num_non_special_orders': num_non_special_orders}, orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last]) - AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) - PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) +ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) + AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) + PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q16.txt b/tests/test_plan_refsols/tpch_q16.txt index fa9f4ec4d..07c0cfd97 100644 --- a/tests/test_plan_refsols/tpch_q16.txt +++ b/tests/test_plan_refsols/tpch_q16.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'SUPPLIER_COUNT': SUPPLIER_COUNT, 'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first]) - AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) - FILTER(condition=p_brand != 'BRAND#45':string & ISIN(p_size, [49, 14, 23, 45, 19, 3, 36, 9]:array[unknown]) & NOT(STARTSWITH(p_type, 'MEDIUM POLISHED%':string)), columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) +ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first], limit=10:numeric) + AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) + FILTER(condition=p_brand != 'BRAND#45':string & ISIN(p_size, [49, 14, 23, 45, 19, 3, 36, 9]:array[unknown]) & NOT(STARTSWITH(p_type, 'MEDIUM POLISHED%':string)), columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8de1daa53..ff041d5aa 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}, orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first]) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) + FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index bbad37875..135018c23 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}, orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first]) - FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) - FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) +ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) + FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 6f4efa2d8..fc41ee839 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,19 +1,18 @@ -ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'s_address': s_address, 's_name': s_name}, orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) +ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 12f513598..707fdeb6b 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,27 +1,25 @@ -ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', NUMWAIT)], orderings=[(NUMWAIT):desc_last, (s_name):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'NUMWAIT': NUMWAIT, 's_name': s_name}, orderings=[(NUMWAIT):desc_last, (s_name):asc_first]) - PROJECT(columns={'NUMWAIT': DEFAULT_TO(n_rows, 0:numeric), 's_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.anything_l_linenumber == t1.l_linenumber & t0.anything_l_orderkey == t1.l_orderkey & t0.anything_o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) - FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_linenumber': anything_l_linenumber, 'anything_l_orderkey': anything_l_orderkey, 'anything_l_suppkey': anything_l_suppkey, 'anything_o_orderkey': anything_o_orderkey}) - AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_linenumber': ANYTHING(l_linenumber), 'anything_l_orderkey': ANYTHING(l_orderkey), 'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderkey': ANYTHING(o_orderkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) - FILTER(condition=supplier_key_19 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus, 'supplier_key_19': t1.l_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - FILTER(condition=supplier_key_36 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'supplier_key_36': t1.l_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) +ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.anything_l_linenumber == t1.l_linenumber & t0.anything_l_orderkey == t1.l_orderkey & t0.anything_o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_linenumber': anything_l_linenumber, 'anything_l_orderkey': anything_l_orderkey, 'anything_l_suppkey': anything_l_suppkey, 'anything_o_orderkey': anything_o_orderkey}) + AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_linenumber': ANYTHING(l_linenumber), 'anything_l_orderkey': ANYTHING(l_orderkey), 'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderkey': ANYTHING(o_orderkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) + FILTER(condition=supplier_key_19 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus, 'supplier_key_19': t1.l_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + FILTER(condition=supplier_key_36 != l_suppkey, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'supplier_key_36': t1.l_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index 61ed28ec5..f6c53f4e2 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,13 +1,11 @@ -ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', REVENUE), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'REVENUE': REVENUE, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, orderings=[(REVENUE):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first]) - PROJECT(columns={'REVENUE': DEFAULT_TO(sum_expr_1, 0:numeric), 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) +ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) + PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 8abc81dc2..634b9ee27 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last]) - LIMIT(limit=Literal(value=10, type=NumericType()), columns={'n_name': n_name, 'o_year': o_year, 'sum_value': sum_value}, orderings=[(n_name):asc_first, (o_year):desc_last]) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) +ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) + AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) + PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index a367c4443..ddf6fd292 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,6 +1,4 @@ -ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w2': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0)), 'w3': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w6': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1)), 'w7': RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)), 'w8': RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) +ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) + SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 80af8f609..5b0a9e60f 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,6 +1,4 @@ -ROOT(columns=[('transaction_id', sbTxId), ('w1', w1), ('w2', w2), ('w3', w3), ('w4', w4), ('w5', w5), ('w6', w6), ('w7', w7), ('w8', w8)], orderings=[(sbTxDateTime):asc_first]) - LIMIT(limit=Literal(value=8, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5, 'w6': w6, 'w7': w7, 'w8': w8}, orderings=[(sbTxDateTime):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'w1': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w2': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4)), 'w3': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w4': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None)), 'w5': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w6': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1)), 'w7': RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)), 'w8': RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) +ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) + SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 99d446335..2628b6b63 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last]) - LIMIT(limit=Literal(value=5, type=NumericType()), columns={'n_name': n_name, 'n_orders': n_orders, 'order_month': order_month, 'order_year': order_year}, orderings=[(n_orders):desc_last]) - AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) + AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) + PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/years_months_days_hours_datediff.txt b/tests/test_plan_refsols/years_months_days_hours_datediff.txt index 6a736c481..ca2da3124 100644 --- a/tests/test_plan_refsols/years_months_days_hours_datediff.txt +++ b/tests/test_plan_refsols/years_months_days_hours_datediff.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', years_diff), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(years_diff):asc_first]) - LIMIT(limit=Literal(value=30, type=NumericType()), columns={'sbTxDateTime': sbTxDateTime, 'years_diff': years_diff}, orderings=[(years_diff):asc_first]) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'years_diff': DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)}) - FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) +ROOT(columns=[('x', sbTxDateTime), ('y1', datetime.datetime(2025, 5, 2, 11, 0):datetime), ('years_diff', DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_years_diff', DATEDIFF('YEARS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_y_diff', DATEDIFF('Y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('y_diff', DATEDIFF('y':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('months_diff', DATEDIFF('months':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_months_diff', DATEDIFF('MONTHS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('mm_diff', DATEDIFF('mm':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('days_diff', DATEDIFF('days':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_days_diff', DATEDIFF('DAYS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_d_diff', DATEDIFF('D':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('d_diff', DATEDIFF('d':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('hours_diff', DATEDIFF('hours':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_hours_diff', DATEDIFF('HOURS':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)), ('c_h_diff', DATEDIFF('H':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime))], orderings=[(DATEDIFF('years':string, sbTxDateTime, datetime.datetime(2025, 5, 2, 11, 0):datetime)):asc_first], limit=30:numeric) + FILTER(condition=YEAR(sbTxDateTime) < 2025:numeric, columns={'sbTxDateTime': sbTxDateTime}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_sql_refsols/datediff_ansi.sql b/tests/test_sql_refsols/datediff_ansi.sql index 7b2fcb32f..1fdbab326 100644 --- a/tests/test_sql_refsols/datediff_ansi.sql +++ b/tests/test_sql_refsols/datediff_ansi.sql @@ -1,24 +1,16 @@ -WITH _t0 AS ( - SELECT - sbtxdatetime, - DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff - FROM main.sbtransaction - WHERE - EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 - ORDER BY - years_diff - LIMIT 30 -) SELECT sbtxdatetime AS x, CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, - years_diff, + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) AS years_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MONTH) AS months_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), DAY) AS days_diff, DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), HOUR) AS hours_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), MINUTE) AS minutes_diff, DATEDIFF(CAST('2023-04-03 13:16:30' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), SECOND) AS seconds_diff -FROM _t0 +FROM main.sbtransaction +WHERE + EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) < 2025 ORDER BY - years_diff + DATEDIFF(CAST('2025-05-02 11:00:00' AS TIMESTAMP), CAST(sbtxdatetime AS DATETIME), YEAR) +LIMIT 30 diff --git a/tests/test_sql_refsols/datediff_sqlite.sql b/tests/test_sql_refsols/datediff_sqlite.sql index 4db97a7d2..967f26673 100644 --- a/tests/test_sql_refsols/datediff_sqlite.sql +++ b/tests/test_sql_refsols/datediff_sqlite.sql @@ -1,19 +1,8 @@ -WITH _t0 AS ( - SELECT - sbtxdatetime, - CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff - FROM main.sbtransaction - WHERE - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 - ORDER BY - years_diff - LIMIT 30 -) SELECT sbtxdatetime AS x, '2025-05-02 11:00:00' AS y1, '2023-04-03 13:16:30' AS y, - years_diff, + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) AS years_diff, ( CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) ) * 12 + CAST(STRFTIME('%m', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%m', sbtxdatetime) AS INTEGER) AS months_diff, @@ -35,6 +24,9 @@ SELECT ) AS INTEGER) * 24 + CAST(STRFTIME('%H', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%H', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%M', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%M', sbtxdatetime) AS INTEGER) ) * 60 + CAST(STRFTIME('%S', '2023-04-03 13:16:30') AS INTEGER) - CAST(STRFTIME('%S', sbtxdatetime) AS INTEGER) AS seconds_diff -FROM _t0 +FROM main.sbtransaction +WHERE + CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) < 2025 ORDER BY - years_diff + CAST(STRFTIME('%Y', '2025-05-02 11:00:00') AS INTEGER) - CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) +LIMIT 30 diff --git a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql index 9ff77a9fc..4662c008b 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_ansi.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s3 AS _s3 ON _s3.sbcustid = sbcustomer.sbcustid ORDER BY - num_transactions DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql index b44a2cd0c..044bef2b7 100644 --- a/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv10_sqlite.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s3 AS _s3 ON _s3.sbcustid = sbcustomer.sbcustid ORDER BY - num_transactions DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv1_ansi.sql b/tests/test_sql_refsols/defog_broker_adv1_ansi.sql index d246ee325..78aa9bb78 100644 --- a/tests/test_sql_refsols/defog_broker_adv1_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv1_ansi.sql @@ -13,5 +13,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql index d246ee325..78aa9bb78 100644 --- a/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv1_sqlite.sql @@ -13,5 +13,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv2_ansi.sql b/tests/test_sql_refsols/defog_broker_adv2_ansi.sql index e8a256981..1e8577455 100644 --- a/tests/test_sql_refsols/defog_broker_adv2_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv2_ansi.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - tx_count DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql index 64bc9b567..d3563cf71 100644 --- a/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv2_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - tx_count DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv4_ansi.sql b/tests/test_sql_refsols/defog_broker_adv4_ansi.sql index 912d5618c..647c42c1b 100644 --- a/tests/test_sql_refsols/defog_broker_adv4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv4_ansi.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbdptickerid = sbticker.sbtickerid ORDER BY - price_change DESC + _s1.max_sbdphigh - _s1.min_sbdplow DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql index af0ff2bbd..1f79591d5 100644 --- a/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv4_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbticker AS sbticker LEFT JOIN _s1 AS _s1 ON _s1.sbdptickerid = sbticker.sbtickerid ORDER BY - price_change DESC + _s1.max_sbdphigh - _s1.min_sbdplow DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql index 81e55dbda..e419755e3 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_ansi.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid -), _t0 AS ( - SELECT - _s1.n_rows, - sbticker.sbtickersymbol, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount - FROM main.sbticker AS sbticker - LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid - ORDER BY - total_amount DESC - LIMIT 10 ) SELECT - sbtickersymbol AS symbol, - COALESCE(n_rows, 0) AS num_transactions, - total_amount -FROM _t0 + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql index 81e55dbda..e419755e3 100644 --- a/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic3_sqlite.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.sbtransaction GROUP BY sbtxtickerid -), _t0 AS ( - SELECT - _s1.n_rows, - sbticker.sbtickersymbol, - COALESCE(_s1.sum_sbtxamount, 0) AS total_amount - FROM main.sbticker AS sbticker - LEFT JOIN _s1 AS _s1 - ON _s1.sbtxtickerid = sbticker.sbtickerid - ORDER BY - total_amount DESC - LIMIT 10 ) SELECT - sbtickersymbol AS symbol, - COALESCE(n_rows, 0) AS num_transactions, - total_amount -FROM _t0 + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid ORDER BY - total_amount DESC + COALESCE(_s1.sum_sbtxamount, 0) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_gen4_ansi.sql b/tests/test_sql_refsols/defog_broker_gen4_ansi.sql index 2f01c1125..7ee620788 100644 --- a/tests/test_sql_refsols/defog_broker_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_gen4_ansi.sql @@ -18,5 +18,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - num_tx DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql b/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql index 1c616a29e..97950bc5b 100644 --- a/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_gen4_sqlite.sql @@ -16,5 +16,5 @@ FROM main.sbcustomer AS sbcustomer LEFT JOIN _s1 AS _s1 ON _s1.sbtxcustid = sbcustomer.sbcustid ORDER BY - num_tx DESC + COALESCE(_s1.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql index db538f89d..4e7ffe8fa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv16_ansi.sql @@ -15,5 +15,5 @@ FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - total DESC + COALESCE(_s1.sum_sale_price, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql index db538f89d..4e7ffe8fa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv16_sqlite.sql @@ -15,5 +15,5 @@ FROM main.salespersons AS salespersons LEFT JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - total DESC + COALESCE(_s1.sum_sale_price, 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql index 7bdd812ac..e29bcd389 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_ansi.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( sale_date >= DATE_ADD(CURRENT_TIMESTAMP(), -3, 'MONTH') GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.salespersons AS salespersons - LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - total_revenue DESC - LIMIT 3 ) SELECT - first_name, - last_name, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql index 280dd33d8..6c9b4e48c 100644 --- a/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic10_sqlite.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( sale_date >= DATETIME('now', '-3 month') GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.salespersons AS salespersons - LEFT JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - total_revenue DESC - LIMIT 3 ) SELECT - first_name, - last_name, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 3134aaf0d..4239e292e 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -8,24 +8,15 @@ WITH _s1 AS ( DATEDIFF(CURRENT_TIMESTAMP(), CAST(sale_date AS DATETIME), DAY) <= 30 GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - _s1.sum_sale_price - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - n_rows DESC - LIMIT 5 ) SELECT - first_name, - last_name, - n_rows AS total_sales, - COALESCE(sum_sale_price, 0) AS total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - n_rows DESC + _s1.n_rows DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 4a046fb86..796cac46a 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -10,24 +10,15 @@ WITH _s1 AS ( ) AS INTEGER) <= 30 GROUP BY salesperson_id -), _t0 AS ( - SELECT - salespersons.first_name, - salespersons.last_name, - _s1.n_rows, - _s1.sum_sale_price - FROM main.salespersons AS salespersons - JOIN _s1 AS _s1 - ON _s1.salesperson_id = salespersons._id - ORDER BY - n_rows DESC - LIMIT 5 ) SELECT - first_name, - last_name, - n_rows AS total_sales, - COALESCE(sum_sale_price, 0) AS total_revenue -FROM _t0 + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id ORDER BY - n_rows DESC + _s1.n_rows DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql index b5c2b3306..92893c9eb 100644 --- a/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic6_ansi.sql @@ -8,5 +8,5 @@ JOIN main.customers AS customers GROUP BY customers.state ORDER BY - total_revenue DESC + COALESCE(SUM(sales.sale_price), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql index b5c2b3306..92893c9eb 100644 --- a/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic6_sqlite.sql @@ -8,5 +8,5 @@ JOIN main.customers AS customers GROUP BY customers.state ORDER BY - total_revenue DESC + COALESCE(SUM(sales.sale_price), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql index 1e9eee3e3..0326e6c44 100644 --- a/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic7_ansi.sql @@ -6,5 +6,5 @@ FROM main.payments_received GROUP BY payment_method ORDER BY - total_amount DESC + COALESCE(SUM(payment_amount), 0) DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql index 1e9eee3e3..0326e6c44 100644 --- a/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic7_sqlite.sql @@ -6,5 +6,5 @@ FROM main.payments_received GROUP BY payment_method ORDER BY - total_amount DESC + COALESCE(SUM(payment_amount), 0) DESC LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql index fe9f62139..ada0973e0 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_ansi.sql @@ -6,24 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id -), _t0 AS ( - SELECT - cars.make, - cars.model, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - ORDER BY - total_revenue DESC - LIMIT 5 ) SELECT - make, - model, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql index fe9f62139..ada0973e0 100644 --- a/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic8_sqlite.sql @@ -6,24 +6,15 @@ WITH _s1 AS ( FROM main.sales GROUP BY car_id -), _t0 AS ( - SELECT - cars.make, - cars.model, - _s1.n_rows, - COALESCE(_s1.sum_sale_price, 0) AS total_revenue - FROM main.cars AS cars - LEFT JOIN _s1 AS _s1 - ON _s1.car_id = cars._id - ORDER BY - total_revenue DESC - LIMIT 5 ) SELECT - make, - model, - COALESCE(n_rows, 0) AS total_sales, - total_revenue -FROM _t0 + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id ORDER BY - total_revenue DESC + COALESCE(_s1.sum_sale_price, 0) DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql index b52fa5282..6c664437a 100644 --- a/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen1_ansi.sql @@ -7,5 +7,5 @@ FROM main.salespersons WHERE NOT termination_date IS NULL ORDER BY - days_employed + DATEDIFF(CAST(termination_date AS DATETIME), CAST(hire_date AS DATETIME), DAY) * 1.0 LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql index e76006a10..2e5c523e7 100644 --- a/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen1_sqlite.sql @@ -9,5 +9,7 @@ FROM main.salespersons WHERE NOT termination_date IS NULL ORDER BY - days_employed + CAST(( + JULIANDAY(DATE(termination_date, 'start of day')) - JULIANDAY(DATE(hire_date, 'start of day')) + ) AS INTEGER) * 1.0 LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql b/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql index 64d4b7ee5..913d74c46 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv15_ansi.sql @@ -17,5 +17,5 @@ FROM main.merchants AS merchants LEFT JOIN _s3 AS _s3 ON _s3.merchant_id = merchants.mid ORDER BY - coupons_per_merchant DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql index 44196293b..1770ea1c5 100644 --- a/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_adv15_sqlite.sql @@ -21,5 +21,5 @@ FROM main.merchants AS merchants LEFT JOIN _s3 AS _s3 ON _s3.merchant_id = merchants.mid ORDER BY - coupons_per_merchant DESC + COALESCE(_s3.n_rows, 0) DESC LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql index 5e675ed08..d3c2d33ec 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_ansi.sql @@ -9,22 +9,14 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id -), _t0 AS ( - SELECT - _s1.n_rows, - merchants.name, - COALESCE(_s1.sum_amount, 0) AS total_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - ORDER BY - total_amount DESC - LIMIT 2 ) SELECT - name AS merchant_name, - COALESCE(n_rows, 0) AS total_transactions, - total_amount -FROM _t0 + merchants.name AS merchant_name, + COALESCE(_s1.n_rows, 0) AS total_transactions, + COALESCE(_s1.sum_amount, 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid ORDER BY - total_amount DESC + COALESCE(_s1.sum_amount, 0) DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql index cd313570f..f50965205 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic10_sqlite.sql @@ -9,22 +9,14 @@ WITH _s1 AS ( AND receiver_type = 1 GROUP BY receiver_id -), _t0 AS ( - SELECT - _s1.n_rows, - merchants.name, - COALESCE(_s1.sum_amount, 0) AS total_amount - FROM main.merchants AS merchants - LEFT JOIN _s1 AS _s1 - ON _s1.receiver_id = merchants.mid - ORDER BY - total_amount DESC - LIMIT 2 ) SELECT - name AS merchant_name, - COALESCE(n_rows, 0) AS total_transactions, - total_amount -FROM _t0 + merchants.name AS merchant_name, + COALESCE(_s1.n_rows, 0) AS total_transactions, + COALESCE(_s1.sum_amount, 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid ORDER BY - total_amount DESC + COALESCE(_s1.sum_amount, 0) DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql index 313c5c222..ffd7b6130 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_ansi.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id -), _t0 AS ( - SELECT - coupons.code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - _s1.sum_amount - FROM main.coupons AS coupons - LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid - ORDER BY - redemption_count DESC - LIMIT 3 ) SELECT - code AS coupon_code, - redemption_count, - COALESCE(sum_amount, 0) AS total_discount -FROM _t0 + coupons.code AS coupon_code, + COALESCE(_s1.count_txid, 0) AS redemption_count, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid ORDER BY - redemption_count DESC + COALESCE(_s1.count_txid, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql index 313c5c222..ffd7b6130 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic8_sqlite.sql @@ -6,22 +6,14 @@ WITH _s1 AS ( FROM main.wallet_transactions_daily GROUP BY coupon_id -), _t0 AS ( - SELECT - coupons.code, - COALESCE(_s1.count_txid, 0) AS redemption_count, - _s1.sum_amount - FROM main.coupons AS coupons - LEFT JOIN _s1 AS _s1 - ON _s1.coupon_id = coupons.cid - ORDER BY - redemption_count DESC - LIMIT 3 ) SELECT - code AS coupon_code, - redemption_count, - COALESCE(sum_amount, 0) AS total_discount -FROM _t0 + coupons.code AS coupon_code, + COALESCE(_s1.count_txid, 0) AS redemption_count, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid ORDER BY - redemption_count DESC + COALESCE(_s1.count_txid, 0) DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql index d10bc2ac9..32cdfea28 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_ansi.sql @@ -16,5 +16,5 @@ WHERE GROUP BY _s1.country ORDER BY - total_amount DESC + COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql index d10bc2ac9..32cdfea28 100644 --- a/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql +++ b/tests/test_sql_refsols/defog_ewallet_basic9_sqlite.sql @@ -16,5 +16,5 @@ WHERE GROUP BY _s1.country ORDER BY - total_amount DESC + COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql index 5d9adaf39..f9e924c2b 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_ansi.sql @@ -3,40 +3,30 @@ WITH _s2 AS ( ev_dt, ev_key FROM events -), _t0 AS ( - SELECT - eras.er_name, - events.ev_dt, - events.ev_name, - seasons.s_name, - times.t_name - FROM events AS events - JOIN eras AS eras - ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) - JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key - JOIN seasons AS seasons - ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) - JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key - JOIN times AS times - ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) - WHERE - events.ev_typ = 'culture' - ORDER BY - ev_dt - LIMIT 6 ) SELECT - ev_name AS event_name, - er_name AS era_name, - EXTRACT(YEAR FROM CAST(ev_dt AS DATETIME)) AS event_year, - s_name AS season_name, - t_name AS tod -FROM _t0 + events.ev_name AS event_name, + eras.er_name AS era_name, + EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) + AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month2 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) + OR seasons.s_month3 = EXTRACT(MONTH FROM CAST(_s2.ev_dt AS DATETIME)) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) + AND times.t_start_hour <= EXTRACT(HOUR FROM CAST(_s6.ev_dt AS DATETIME)) +WHERE + events.ev_typ = 'culture' ORDER BY - ev_dt + events.ev_dt +LIMIT 6 diff --git a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql index 1cbe48ccc..25f7ff28f 100644 --- a/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql +++ b/tests/test_sql_refsols/epoch_culture_events_info_sqlite.sql @@ -3,40 +3,30 @@ WITH _s2 AS ( ev_dt, ev_key FROM events -), _t0 AS ( - SELECT - eras.er_name, - events.ev_dt, - events.ev_name, - seasons.s_name, - times.t_name - FROM events AS events - JOIN eras AS eras - ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) - JOIN _s2 AS _s2 - ON _s2.ev_key = events.ev_key - JOIN seasons AS seasons - ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) - JOIN _s2 AS _s6 - ON _s6.ev_key = events.ev_key - JOIN times AS times - ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) - WHERE - events.ev_typ = 'culture' - ORDER BY - ev_dt - LIMIT 6 ) SELECT - ev_name AS event_name, - er_name AS era_name, - CAST(STRFTIME('%Y', ev_dt) AS INTEGER) AS event_year, - s_name AS season_name, - t_name AS tod -FROM _t0 + events.ev_name AS event_name, + eras.er_name AS era_name, + CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) + AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month2 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) + OR seasons.s_month3 = CAST(STRFTIME('%m', _s2.ev_dt) AS INTEGER) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) + AND times.t_start_hour <= CAST(STRFTIME('%H', _s6.ev_dt) AS INTEGER) +WHERE + events.ev_typ = 'culture' ORDER BY - ev_dt + events.ev_dt +LIMIT 6 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql index 30ed1ca1b..cd2c4a531 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_ansi.sql @@ -1,19 +1,9 @@ -WITH _t0 AS ( - SELECT - ps_availqty, - ps_partkey, - ps_suppkey, - CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost - FROM tpch.partsupp - ORDER BY - total_cost DESC - LIMIT 10 -) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, FLOOR(ps_availqty) AS complete_parts, - total_cost -FROM _t0 + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost +FROM tpch.partsupp ORDER BY - total_cost DESC + CEIL(ps_supplycost * FLOOR(ps_availqty)) DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql index 95f227be8..6db785108 100644 --- a/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql +++ b/tests/test_sql_refsols/floor_and_ceil_2_sqlite.sql @@ -1,29 +1,29 @@ -WITH _t0 AS ( - SELECT - ps_availqty, - ps_partkey, - ps_suppkey, - CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) + CASE - WHEN CAST(ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) AS INTEGER) < ps_supplycost * ( - CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END - ) - THEN 1 - ELSE 0 - END AS total_cost - FROM tpch.partsupp - ORDER BY - total_cost DESC - LIMIT 10 -) SELECT ps_suppkey AS supplier_key, ps_partkey AS part_key, CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END AS complete_parts, - total_cost -FROM _t0 + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END AS total_cost +FROM tpch.partsupp ORDER BY - total_cost DESC + CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) + CASE + WHEN CAST(ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) AS INTEGER) < ps_supplycost * ( + CAST(ps_availqty AS INTEGER) - CASE WHEN ps_availqty < CAST(ps_availqty AS INTEGER) THEN 1 ELSE 0 END + ) + THEN 1 + ELSE 0 + END DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql index b6fe25cdb..b4a7156ba 100644 --- a/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_decode3_sqlite.sql @@ -1,14 +1,3 @@ -WITH _t0 AS ( - SELECT - o_orderkey, - o_orderpriority - FROM tpch.orders - WHERE - o_clerk = 'Clerk#000000951' - ORDER BY - o_orderkey - LIMIT 10 -) SELECT o_orderkey AS key, CASE @@ -20,6 +9,9 @@ SELECT THEN 'C' ELSE 'D' END AS val -FROM _t0 +FROM tpch.orders +WHERE + o_clerk = 'Clerk#000000951' ORDER BY o_orderkey +LIMIT 10 diff --git a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql index 7d81bf0ec..df8c90691 100644 --- a/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_format_datetime_sqlite.sql @@ -1,19 +1,10 @@ -WITH _t0 AS ( - SELECT - o_orderdate, - o_orderkey, - o_totalprice - FROM tpch.orders - ORDER BY - o_totalprice - LIMIT 5 -) SELECT o_orderkey AS key, STRFTIME('%d/%m/%Y', o_orderdate) AS d1, STRFTIME('%Y:%j', o_orderdate) AS d2, CAST(STRFTIME('%s', o_orderdate) AS INTEGER) AS d3, CAST(STRFTIME('%Y%m%d', o_orderdate, '+39 days', 'start of month') AS INTEGER) AS d4 -FROM _t0 +FROM tpch.orders ORDER BY o_totalprice +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index da20e4054..954197d6d 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ir DESC, + ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 20695a77b..5462388f5 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ir DESC, + ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql index bea21073d..c05d32834 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_ansi.sql @@ -42,5 +42,7 @@ CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 ORDER BY - ir DESC + ROUND(( + 1.0 * COALESCE(_s9.sum_n_rows, 0) + ) / COALESCE(_s9.n_rows, 0), 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql index e460f8540..6cc634740 100644 --- a/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_sqlite.sql @@ -42,5 +42,7 @@ CROSS JOIN _s0 AS _s1 LEFT JOIN _s9 AS _s9 ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 ORDER BY - ir DESC + ROUND(CAST(( + 1.0 * COALESCE(_s9.sum_n_rows, 0) + ) AS REAL) / COALESCE(_s9.n_rows, 0), 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index d4bcd9d17..276282dbe 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -27,5 +27,5 @@ FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ir DESC + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 5a99bc7fc..60fd910bb 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -27,5 +27,5 @@ FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ir DESC + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/tpch_q10_ansi.sql b/tests/test_sql_refsols/tpch_q10_ansi.sql index 912de751d..f0a2699a0 100644 --- a/tests/test_sql_refsols/tpch_q10_ansi.sql +++ b/tests/test_sql_refsols/tpch_q10_ansi.sql @@ -28,6 +28,6 @@ LEFT JOIN _s3 AS _s3 JOIN tpch.nation AS nation ON customer.c_nationkey = nation.n_nationkey ORDER BY - revenue DESC, + COALESCE(_s3.sum_expr_1, 0) DESC, c_custkey LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q10_sqlite.sql b/tests/test_sql_refsols/tpch_q10_sqlite.sql index 20a1b7a34..7e5943713 100644 --- a/tests/test_sql_refsols/tpch_q10_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q10_sqlite.sql @@ -41,6 +41,6 @@ LEFT JOIN _s3 AS _s3 JOIN tpch.nation AS nation ON customer.c_nationkey = nation.n_nationkey ORDER BY - revenue DESC, + COALESCE(_s3.sum_expr_1, 0) DESC, c_custkey LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 64803c51d..67f82c489 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -16,7 +16,7 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t1 AS ( +), _t2 AS ( SELECT COUNT(*) AS n_rows, partsupp.ps_suppkey @@ -35,8 +35,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t1 AS _t1 - ON _t1.n_rows > 0 AND _t1.ps_suppkey = supplier.s_suppkey +JOIN _t2 AS _t2 + ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index e5b221d69..c7f3110d7 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -16,7 +16,7 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t1 AS ( +), _t2 AS ( SELECT COUNT(*) AS n_rows, partsupp.ps_suppkey @@ -35,8 +35,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t1 AS _t1 - ON _t1.n_rows > 0 AND _t1.ps_suppkey = supplier.s_suppkey +JOIN _t2 AS _t2 + ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_ansi.sql b/tests/test_sql_refsols/tpch_q21_ansi.sql index 88ec9d9f7..b57b17e13 100644 --- a/tests/test_sql_refsols/tpch_q21_ansi.sql +++ b/tests/test_sql_refsols/tpch_q21_ansi.sql @@ -1,4 +1,4 @@ -WITH _t7 AS ( +WITH _t6 AS ( SELECT l_commitdate, l_linenumber, @@ -8,47 +8,47 @@ WITH _t7 AS ( FROM tpch.lineitem WHERE l_commitdate < l_receiptdate -), _t4 AS ( +), _t3 AS ( SELECT - ANY_VALUE(_t7.l_linenumber) AS anything_l_linenumber, - ANY_VALUE(_t7.l_orderkey) AS anything_l_orderkey, - ANY_VALUE(_t7.l_suppkey) AS anything_l_suppkey, + ANY_VALUE(_t6.l_linenumber) AS anything_l_linenumber, + ANY_VALUE(_t6.l_orderkey) AS anything_l_orderkey, + ANY_VALUE(_t6.l_suppkey) AS anything_l_suppkey, ANY_VALUE(orders.o_orderkey) AS anything_o_orderkey, ANY_VALUE(orders.o_orderstatus) AS anything_o_orderstatus - FROM _t7 AS _t7 + FROM _t6 AS _t6 JOIN tpch.orders AS orders - ON _t7.l_orderkey = orders.o_orderkey + ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t7.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + ON _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t7.l_linenumber, - _t7.l_orderkey, + _t6.l_linenumber, + _t6.l_orderkey, orders.o_orderkey ), _s11 AS ( SELECT - _t9.l_linenumber, - _t9.l_orderkey, + _t8.l_linenumber, + _t8.l_orderkey, orders.o_orderkey - FROM _t7 AS _t9 + FROM _t6 AS _t8 JOIN tpch.orders AS orders - ON _t9.l_orderkey = orders.o_orderkey + ON _t8.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t9.l_suppkey <> lineitem.l_suppkey + ON _t8.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_orderkey = orders.o_orderkey ), _s13 AS ( SELECT COUNT(*) AS n_rows, - _t4.anything_l_suppkey - FROM _t4 AS _t4 + _t3.anything_l_suppkey + FROM _t3 AS _t3 JOIN _s11 AS _s11 - ON _s11.l_linenumber = _t4.anything_l_linenumber - AND _s11.l_orderkey = _t4.anything_l_orderkey - AND _s11.o_orderkey = _t4.anything_o_orderkey + ON _s11.l_linenumber = _t3.anything_l_linenumber + AND _s11.l_orderkey = _t3.anything_l_orderkey + AND _s11.o_orderkey = _t3.anything_o_orderkey WHERE - _t4.anything_o_orderstatus = 'F' + _t3.anything_o_orderstatus = 'F' GROUP BY - _t4.anything_l_suppkey + _t3.anything_l_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -59,6 +59,6 @@ JOIN tpch.nation AS nation LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey ORDER BY - numwait DESC, + COALESCE(_s13.n_rows, 0) DESC, s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_sqlite.sql b/tests/test_sql_refsols/tpch_q21_sqlite.sql index fd7da4b57..7e0ec8787 100644 --- a/tests/test_sql_refsols/tpch_q21_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q21_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t7 AS ( +WITH _t6 AS ( SELECT l_commitdate, l_linenumber, @@ -8,51 +8,51 @@ WITH _t7 AS ( FROM tpch.lineitem WHERE l_commitdate < l_receiptdate -), _t4 AS ( +), _t3 AS ( SELECT - MAX(_t7.l_linenumber) AS anything_l_linenumber, - MAX(_t7.l_orderkey) AS anything_l_orderkey, - MAX(_t7.l_suppkey) AS anything_l_suppkey, + MAX(_t6.l_linenumber) AS anything_l_linenumber, + MAX(_t6.l_orderkey) AS anything_l_orderkey, + MAX(_t6.l_suppkey) AS anything_l_suppkey, MAX(orders.o_orderkey) AS anything_o_orderkey, MAX(orders.o_orderstatus) AS anything_o_orderstatus - FROM _t7 AS _t7 + FROM _t6 AS _t6 JOIN tpch.orders AS orders - ON _t7.l_orderkey = orders.o_orderkey + ON _t6.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t7.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + ON _t6.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t7.l_linenumber, - _t7.l_orderkey, + _t6.l_linenumber, + _t6.l_orderkey, orders.o_orderkey ), _u_0 AS ( SELECT - _t9.l_linenumber AS _u_1, - _t9.l_orderkey AS _u_2, + _t8.l_linenumber AS _u_1, + _t8.l_orderkey AS _u_2, orders.o_orderkey AS _u_3 - FROM _t7 AS _t9 + FROM _t6 AS _t8 JOIN tpch.orders AS orders - ON _t9.l_orderkey = orders.o_orderkey + ON _t8.l_orderkey = orders.o_orderkey JOIN tpch.lineitem AS lineitem - ON _t9.l_suppkey <> lineitem.l_suppkey + ON _t8.l_suppkey <> lineitem.l_suppkey AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_orderkey = orders.o_orderkey GROUP BY - _t9.l_linenumber, - _t9.l_orderkey, + _t8.l_linenumber, + _t8.l_orderkey, orders.o_orderkey ), _s13 AS ( SELECT COUNT(*) AS n_rows, - _t4.anything_l_suppkey - FROM _t4 AS _t4 + _t3.anything_l_suppkey + FROM _t3 AS _t3 LEFT JOIN _u_0 AS _u_0 - ON _t4.anything_l_linenumber = _u_0._u_1 - AND _t4.anything_l_orderkey = _u_0._u_2 - AND _t4.anything_o_orderkey = _u_0._u_3 + ON _t3.anything_l_linenumber = _u_0._u_1 + AND _t3.anything_l_orderkey = _u_0._u_2 + AND _t3.anything_o_orderkey = _u_0._u_3 WHERE - _t4.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL + _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL GROUP BY - _t4.anything_l_suppkey + _t3.anything_l_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -63,6 +63,6 @@ JOIN tpch.nation AS nation LEFT JOIN _s13 AS _s13 ON _s13.anything_l_suppkey = supplier.s_suppkey ORDER BY - numwait DESC, + COALESCE(_s13.n_rows, 0) DESC, s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q2_ansi.sql b/tests/test_sql_refsols/tpch_q2_ansi.sql index 003040077..07e6da4ff 100644 --- a/tests/test_sql_refsols/tpch_q2_ansi.sql +++ b/tests/test_sql_refsols/tpch_q2_ansi.sql @@ -1,4 +1,4 @@ -WITH _t0 AS ( +WITH _t1 AS ( SELECT nation.n_name, part.p_mfgr, @@ -31,7 +31,7 @@ SELECT s_address AS S_ADDRESS, s_phone AS S_PHONE, s_comment AS S_COMMENT -FROM _t0 +FROM _t1 ORDER BY s_acctbal DESC, n_name, diff --git a/tests/test_sql_refsols/tpch_q3_ansi.sql b/tests/test_sql_refsols/tpch_q3_ansi.sql index f50a43d15..6511e3f13 100644 --- a/tests/test_sql_refsols/tpch_q3_ansi.sql +++ b/tests/test_sql_refsols/tpch_q3_ansi.sql @@ -18,7 +18,9 @@ GROUP BY orders.o_orderdate, orders.o_shippriority ORDER BY - revenue DESC, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC, o_orderdate, l_orderkey LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q3_sqlite.sql b/tests/test_sql_refsols/tpch_q3_sqlite.sql index 7fa133e3b..d23483f4d 100644 --- a/tests/test_sql_refsols/tpch_q3_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q3_sqlite.sql @@ -17,7 +17,9 @@ GROUP BY orders.o_orderdate, orders.o_shippriority ORDER BY - revenue DESC, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) DESC, o_orderdate, l_orderkey LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 99390991a..12fe871b0 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -1,37 +1,30 @@ -WITH _t0 AS ( - SELECT +SELECT + nation.n_name AS NATION, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS O_YEAR, + COALESCE( SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ) AS sum_value, - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) AS o_year - FROM tpch.lineitem AS lineitem - JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' - JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey - JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey - JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey - JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey - GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) - ORDER BY - n_name, - o_year DESC - LIMIT 10 -) -SELECT - n_name AS NATION, - o_year AS O_YEAR, - COALESCE(sum_value, 0) AS AMOUNT -FROM _t0 + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + nation.n_name, + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) ORDER BY - n_name, + nation.n_name, o_year DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index ac17a15d1..37e726db9 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -1,37 +1,30 @@ -WITH _t0 AS ( - SELECT +SELECT + nation.n_name AS NATION, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS O_YEAR, + COALESCE( SUM( lineitem.l_extendedprice * ( 1 - lineitem.l_discount ) - partsupp.ps_supplycost * lineitem.l_quantity - ) AS sum_value, - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) AS o_year - FROM tpch.lineitem AS lineitem - JOIN tpch.part AS part - ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' - JOIN tpch.supplier AS supplier - ON lineitem.l_suppkey = supplier.s_suppkey - JOIN tpch.nation AS nation - ON nation.n_nationkey = supplier.s_nationkey - JOIN tpch.orders AS orders - ON lineitem.l_orderkey = orders.o_orderkey - JOIN tpch.partsupp AS partsupp - ON lineitem.l_partkey = partsupp.ps_partkey - AND lineitem.l_suppkey = partsupp.ps_suppkey - GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) - ORDER BY - n_name, - o_year DESC - LIMIT 10 -) -SELECT - n_name AS NATION, - o_year AS O_YEAR, - COALESCE(sum_value, 0) AS AMOUNT -FROM _t0 + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + nation.n_name, + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) ORDER BY - n_name, + nation.n_name, o_year DESC +LIMIT 10 From 665a9dd3d5c325d9e0d0a7533c9c40b962126a5a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:23:23 -0400 Subject: [PATCH 15/97] Restoring filter modifications --- pydough/conversion/projection_pullup.py | 3 ++ tests/test_plan_refsols/bad_child_reuse_2.txt | 6 +-- tests/test_plan_refsols/bad_child_reuse_3.txt | 6 +-- tests/test_plan_refsols/common_prefix_n.txt | 6 +-- tests/test_plan_refsols/common_prefix_o.txt | 6 +-- tests/test_plan_refsols/correl_14.txt | 30 +++++------ tests/test_plan_refsols/correl_15.txt | 36 ++++++------- tests/test_plan_refsols/correl_18.txt | 17 +++--- tests/test_plan_refsols/correl_20.txt | 26 +++++---- tests/test_plan_refsols/correl_24.txt | 14 ++--- .../month_year_sliding_windows.txt | 29 +++++----- .../multi_partition_access_6.txt | 53 +++++++++---------- .../technograph_monthly_incident_rate.txt | 51 +++++++++--------- ..._year_cumulative_incident_rate_overall.txt | 6 +-- tests/test_plan_refsols/tpch_q11.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 19 ++++--- tests/test_plan_refsols/tpch_q22.txt | 6 +-- .../window_filter_order_10.txt | 13 +++-- ...technograph_monthly_incident_rate_ansi.sql | 34 ++++++------ ...chnograph_monthly_incident_rate_sqlite.sql | 34 ++++++------ tests/test_sql_refsols/tpch_q11_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 4 +- 22 files changed, 200 insertions(+), 207 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 768944058..b126550ad 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -243,6 +243,9 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) + case Filter(): + pull_project_into_filter(node) + return pull_non_columns(node) case Limit(): pull_project_into_limit(node) return pull_non_columns(node) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 9aa529377..f9274e077 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 9aa529377..f9274e077 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_orders': n_orders}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'n_rows': n_rows}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index fc3a3530b..0235aa2fa 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_small_parts': DEFAULT_TO(sum_agg_11, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_p_retailprice, 0:numeric)}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 1916a60a9..808be5ed1 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', total_retail_price), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': total_retail_price}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'total_retail_price': DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index c5da9fde3..feffdfb1a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,19 +1,17 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index aa5568ea8..6d7af3f9c 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,22 +1,20 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) - PROJECT(columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'supplier_avg_price': sum_p_retailprice / sum_expr_1}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric, columns={'p_retailprice': p_retailprice, 's_suppkey': s_suppkey, 'sum_expr_1': sum_expr_1, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) + PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index e34eb6923..5f6ca684d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,12 +1,11 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * total_price_sum, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - PROJECT(columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'total_price_sum': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 8c61c789f..a480ec3e3 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,16 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=domestic, columns={}) - PROJECT(columns={'domestic': name_16 == n_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'name_16': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index af102dee8..1217865d6 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -4,9 +4,11 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=year < 1994:numeric, columns={'month': month, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 69a863b05..d438e6e1e 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,16 +1,15 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=month_total_spent > NEXT(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & month_total_spent > PREV(args=[month_total_spent], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - PROJECT(columns={'month': month, 'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=curr_year_total_spent > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'curr_year_total_spent': DEFAULT_TO(sum_month_total_spent, 0:numeric), 'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) + PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 05b282309..5ee7193e5 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,26 +2,25 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_cust_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - PROJECT(columns={'n_cust_trans': DEFAULT_TO(sum_n_cust_type_trans, 0:numeric), 'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -41,21 +40,19 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - PROJECT(columns={'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=n_ticker_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 6f30065aa..8aa3a87db 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,29 +1,28 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_3': t0.n_rows, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'month': month, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'month': MONTH(ca_dt), 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 25ad10b87..4c754cbe5 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[n_incidents], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * n_incidents - PREV(args=[n_incidents], partition=[], order=[(year):asc_last]) / PREV(args=[n_incidents], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', n_incidents)], orderings=[(year):asc_first]) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'n_incidents': n_incidents, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'n_incidents': DEFAULT_TO(sum_n_rows, 0:numeric), 'year': year}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) + FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) + PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 1c7b826c2..633b50afb 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) - FILTER(condition=VALUE > min_market_share, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'min_market_share': DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, 'ps_partkey': ps_partkey}) + FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) + PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) PROJECT(columns={'metric': ps_supplycost * ps_availqty}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index fc41ee839..245e3ef0e 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -6,13 +6,12 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(part_qty, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - PROJECT(columns={'part_qty': DEFAULT_TO(sum_l_quantity, 0:numeric), 'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index ff832eb90..60e4e77d6 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -7,10 +7,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - PROJECT(columns={'c_acctbal': c_acctbal, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 96e986806..28100ecde 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,9 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[NULL_1], partition=[], order=[]), columns={}) - PROJECT(columns={'NULL_1': None:unknown, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) - FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 3c42321e0..a80b170ef 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t4 AS ( SELECT ca_dt FROM main.calendar @@ -15,7 +15,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, _t6.ca_dt - FROM _t3 AS _t6 + FROM _t4 AS _t6 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices @@ -27,25 +27,25 @@ WITH _t3 AS ( ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t10.ca_dt - FROM _t3 AS _t10 + _t9.ca_dt + FROM _t4 AS _t9 JOIN main.incidents AS incidents - ON _t10.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t9.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t11 - ON _t11.co_id = devices.de_production_country_id + JOIN _t7 AS _t10 + ON _t10.co_id = devices.de_production_country_id GROUP BY - _t10.ca_dt + _t9.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t3 AS _t3 +FROM _t4 AS _t4 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t4.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t4.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index 1c08c7d2e..de7f0b427 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t3 AS ( +WITH _t4 AS ( SELECT ca_dt FROM main.calendar @@ -15,7 +15,7 @@ WITH _t3 AS ( SELECT COUNT(*) AS n_rows, _t6.ca_dt - FROM _t3 AS _t6 + FROM _t4 AS _t6 JOIN main.calendar AS calendar ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') JOIN main.devices AS devices @@ -27,25 +27,25 @@ WITH _t3 AS ( ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t10.ca_dt - FROM _t3 AS _t10 + _t9.ca_dt + FROM _t4 AS _t9 JOIN main.incidents AS incidents - ON _t10.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t9.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t11 - ON _t11.co_id = devices.de_production_country_id + JOIN _t7 AS _t10 + ON _t10.co_id = devices.de_production_country_id GROUP BY - _t10.ca_dt + _t9.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t3 AS _t3 +FROM _t4 AS _t4 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t4.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t4.ca_dt GROUP BY - CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 53460ad26..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -35,7 +35,9 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 53460ad26..f76f36d96 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -35,7 +35,9 @@ SELECT COALESCE(_s9.sum_expr_2, 0) AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 - ON COALESCE(_s8.sum_metric, 0) * 0.0001 < COALESCE(_s9.sum_expr_2, 0) + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY value DESC LIMIT 10 From d1fe25bf9f281c3adc49bbffa605f5af51211862 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:45:44 -0400 Subject: [PATCH 16/97] Started aggregation project pullup --- pydough/conversion/agg_split.py | 2 +- pydough/conversion/column_bubbler.py | 9 ++-- pydough/conversion/projection_pullup.py | 53 +++++++++++++++++++ pydough/conversion/relational_converter.py | 2 +- .../relational/relational_nodes/aggregate.py | 16 +++--- .../agg_orders_by_year_month_basic.txt | 5 +- .../agg_orders_by_year_month_just_europe.txt | 24 ++++----- .../agg_orders_by_year_month_vs_europe.txt | 24 ++++----- tests/test_plan_refsols/agg_partition.txt | 5 +- .../aggregate_mixed_levels_simple.txt | 9 ++-- .../aggregate_on_function_call.txt | 5 +- .../aggregation_analytics_1.txt | 23 ++++---- .../aggregation_analytics_2.txt | 23 ++++---- .../aggregation_analytics_3.txt | 23 ++++---- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 5 +- tests/test_plan_refsols/common_prefix_a.txt | 11 ++-- tests/test_plan_refsols/common_prefix_ag.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_ah.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_ai.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_aj.txt | 41 +++++++------- tests/test_plan_refsols/common_prefix_b.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_c.txt | 32 ++++++----- tests/test_plan_refsols/common_prefix_d.txt | 35 ++++++------ tests/test_plan_refsols/common_prefix_e.txt | 11 ++-- tests/test_plan_refsols/common_prefix_f.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_g.txt | 19 ++++--- tests/test_plan_refsols/common_prefix_h.txt | 32 ++++++----- tests/test_plan_refsols/correl_14.txt | 9 ++-- tests/test_plan_refsols/correl_15.txt | 9 ++-- tests/test_plan_refsols/correl_24.txt | 9 ++-- tests/test_plan_refsols/correl_26.txt | 27 +++++----- tests/test_plan_refsols/correl_27.txt | 25 +++++---- tests/test_plan_refsols/correl_28.txt | 21 ++++---- tests/test_plan_refsols/correl_30.txt | 29 +++++----- tests/test_plan_refsols/correl_31.txt | 31 ++++++----- .../count_cust_supplier_nation_combos.txt | 19 ++++--- .../customer_largest_order_deltas.txt | 7 ++- tests/test_plan_refsols/double_partition.txt | 5 +- .../epoch_intra_season_searches.txt | 42 +++++++-------- .../global_acctbal_breakdown.txt | 5 +- .../highest_priority_per_year.txt | 5 +- .../month_year_sliding_windows.txt | 17 +++--- .../nation_acctbal_breakdown.txt | 5 +- .../odate_and_rdate_avggap.txt | 11 ++-- .../region_acctbal_breakdown.txt | 9 ++-- .../simple_var_std_with_nulls.txt | 7 ++- .../sqlite_udf_combine_strings.txt | 19 +++---- .../sqlite_udf_covar_pop.txt | 17 +++--- tests/test_plan_refsols/sqlite_udf_nested.txt | 15 +++--- .../sqlite_udf_percent_epsilon.txt | 9 ++-- .../sqlite_udf_percent_positive.txt | 16 +++--- tests/test_plan_refsols/sqlite_udf_relmin.txt | 7 ++- ...ograph_battery_failure_rates_anomalies.txt | 25 +++++---- ..._error_rate_sun_set_by_factory_country.txt | 17 +++--- ...hnograph_incident_rate_by_release_year.txt | 22 ++++---- .../technograph_incident_rate_per_brand.txt | 15 +++--- .../technograph_monthly_incident_rate.txt | 4 +- .../technograph_most_unreliable_products.txt | 15 +++--- ...umulative_incident_rate_goldcopperstar.txt | 4 +- ..._year_cumulative_incident_rate_overall.txt | 4 +- tests/test_plan_refsols/tpch_q1.txt | 7 ++- tests/test_plan_refsols/tpch_q10.txt | 13 +++-- tests/test_plan_refsols/tpch_q11.txt | 30 +++++------ tests/test_plan_refsols/tpch_q12.txt | 11 ++-- tests/test_plan_refsols/tpch_q13.txt | 13 +++-- tests/test_plan_refsols/tpch_q14.txt | 11 ++-- tests/test_plan_refsols/tpch_q15.txt | 21 ++++---- tests/test_plan_refsols/tpch_q19.txt | 13 +++-- tests/test_plan_refsols/tpch_q3.txt | 19 ++++--- tests/test_plan_refsols/tpch_q5.txt | 33 ++++++------ tests/test_plan_refsols/tpch_q6.txt | 7 ++- tests/test_plan_refsols/tpch_q7.txt | 4 +- tests/test_plan_refsols/tpch_q8.txt | 39 +++++++------- tests/test_plan_refsols/tpch_q9.txt | 27 +++++----- tests/test_plan_refsols/triple_partition.txt | 47 ++++++++-------- .../year_month_nation_orders.txt | 21 ++++---- .../yoy_change_in_num_orders.txt | 5 +- .../defog_broker_adv5_ansi.sql | 2 +- .../defog_broker_adv5_sqlite.sql | 2 +- .../defog_broker_adv7_ansi.sql | 4 +- .../defog_broker_adv7_sqlite.sql | 4 +- .../defog_dealership_gen4_ansi.sql | 8 +-- .../defog_dealership_gen4_sqlite.sql | 10 ++-- .../sqlite_udf_combine_strings_sqlite.sql | 4 +- .../sqlite_udf_covar_pop_sqlite.sql | 14 ++--- .../sqlite_udf_nested_sqlite.sql | 4 +- .../sqlite_udf_percent_epsilon_sqlite.sql | 4 +- .../sqlite_udf_relmin_sqlite.sql | 4 +- ...aph_incident_rate_by_release_year_ansi.sql | 4 +- ...h_incident_rate_by_release_year_sqlite.sql | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 46 ++++++++-------- ...chnograph_monthly_incident_rate_sqlite.sql | 46 ++++++++-------- ...tive_incident_rate_goldcopperstar_ansi.sql | 10 ++-- ...ve_incident_rate_goldcopperstar_sqlite.sql | 10 ++-- tests/test_sql_refsols/tpch_q11_ansi.sql | 10 ++-- tests/test_sql_refsols/tpch_q11_sqlite.sql | 10 ++-- tests/test_sql_refsols/tpch_q15_ansi.sql | 6 +-- tests/test_sql_refsols/tpch_q15_sqlite.sql | 6 +-- tests/test_sql_refsols/tpch_q7_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q7_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q9_ansi.sql | 4 +- tests/test_sql_refsols/tpch_q9_sqlite.sql | 4 +- 102 files changed, 779 insertions(+), 817 deletions(-) diff --git a/pydough/conversion/agg_split.py b/pydough/conversion/agg_split.py index 9879c37c5..66745f923 100644 --- a/pydough/conversion/agg_split.py +++ b/pydough/conversion/agg_split.py @@ -226,7 +226,7 @@ def transpose_aggregate_join( # Derive which columns are used as aggregate keys by # the input. - input_keys: dict[str, ColumnReference] = {} + input_keys: dict[str, RelationalExpression] = {} for ref in side_keys: input_keys[ref.name] = ref.with_input(None) for agg_key in node.keys.values(): diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index d5d0bc131..6f5b68017 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -188,16 +188,19 @@ def run_column_bubbling( # For aggregate, do the same as projection but run separately for # keys and aggregations. new_input, input_mapping = run_column_bubbling(node.input, corr_remap) - new_keys: dict[str, ColumnReference] = {} + new_keys: dict[str, RelationalExpression] = {} new_aggs: dict[str, CallExpression] = {} for name, key_expr in node.keys.items(): new_expr = apply_substitution(key_expr, input_mapping, corr_remap) - assert isinstance(new_expr, ColumnReference) new_ref = ColumnReference(name, key_expr.data_type) if new_expr in aliases: remapping[new_ref] = aliases[new_expr] else: - if new_expr.name != name and new_expr.name not in used_names: + if ( + isinstance(new_expr, ColumnReference) + and new_expr.name != name + and new_expr.name not in used_names + ): used_names.add(new_expr.name) alt_ref = ColumnReference(new_expr.name, new_expr.data_type) remapping[new_ref] = alt_ref diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index b126550ad..4e353c914 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -8,6 +8,8 @@ from pydough.relational import ( + Aggregate, + CallExpression, ColumnReference, ExpressionSortInfo, Filter, @@ -229,6 +231,55 @@ def pull_project_into_limit(node: Limit) -> None: ] +def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: + """ + TODO + """ + if not isinstance(node.input, Project): + return node + + project: Project = node.input + + finder: ColumnReferenceFinder = ColumnReferenceFinder() + finder.reset() + for key_expr in node.aggregations.values(): + key_expr.accept(finder) + agg_cols: set[ColumnReference] = finder.get_column_references() + agg_names: set[str] = {col.name for col in agg_cols} + finder.reset() + for agg_expr in node.keys.values(): + agg_expr.accept(finder) + key_cols: set[ColumnReference] = finder.get_column_references() + key_names: set[str] = {col.name for col in key_cols} + + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + substitutions: dict[RelationalExpression, RelationalExpression] = {} + new_expr: RelationalExpression + for name, expr in project.columns.items(): + new_expr = apply_substitution(expr, transfer_substitutions, {}) + if (not contains_window(new_expr)) and ( + (name in agg_names) != (name in key_names) + ): + ref_expr: ColumnReference = ColumnReference(name, expr.data_type) + substitutions[ref_expr] = new_expr + new_keys: dict[str, RelationalExpression] = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.keys.items() + } + new_aggs: dict[str, CallExpression] = {} + for name, expr in node.aggregations.items(): + new_expr = apply_substitution(expr, substitutions, {}) + assert isinstance(new_expr, CallExpression) + new_aggs[name] = new_expr + return Aggregate( + input=node.input, + keys=new_keys, + aggregations=new_aggs, + ) + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -249,5 +300,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: case Limit(): pull_project_into_limit(node) return pull_non_columns(node) + case Aggregate(): + return pull_project_into_aggregate(node) case _: return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 66754ba96..ac67d9852 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -588,7 +588,7 @@ def apply_aggregations( ConnectionType.NO_MATCH_AGGREGATION, ) out_columns: dict[HybridExpr, ColumnReference] = {} - keys: dict[str, ColumnReference] = {} + keys: dict[str, RelationalExpression] = {} aggregations: dict[str, CallExpression] = {} used_names: set[str] = set() # First, propagate all key columns into the output, and add them to diff --git a/pydough/relational/relational_nodes/aggregate.py b/pydough/relational/relational_nodes/aggregate.py index 94cf63bb8..8fe953577 100644 --- a/pydough/relational/relational_nodes/aggregate.py +++ b/pydough/relational/relational_nodes/aggregate.py @@ -6,7 +6,6 @@ from pydough.relational.relational_expressions import ( CallExpression, - ColumnReference, RelationalExpression, ) @@ -24,7 +23,7 @@ class Aggregate(SingleRelational): def __init__( self, input: RelationalNode, - keys: dict[str, ColumnReference], + keys: dict[str, RelationalExpression], aggregations: dict[str, CallExpression], ) -> None: total_cols: dict[str, RelationalExpression] = {**keys, **aggregations} @@ -32,14 +31,14 @@ def __init__( "Keys and aggregations must have unique names" ) super().__init__(input, total_cols) - self._keys: dict[str, ColumnReference] = keys + self._keys: dict[str, RelationalExpression] = keys self._aggregations: dict[str, CallExpression] = aggregations assert all(agg.is_aggregation for agg in aggregations.values()), ( "All functions used in aggregations must be aggregation functions" ) @property - def keys(self) -> dict[str, ColumnReference]: + def keys(self) -> dict[str, RelationalExpression]: """ The keys for the aggregation operation. """ @@ -78,11 +77,8 @@ def node_copy( keys = {} aggregations = {} for key, val in columns.items(): - if isinstance(val, ColumnReference): - keys[key] = val - else: - assert isinstance(val, CallExpression), ( - "All columns must be references or functions" - ) + if isinstance(val, CallExpression) and val.op.is_aggregation: aggregations[key] = val + else: + keys[key] = val return Aggregate(inputs[0], keys, aggregations) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt b/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt index cee7d4ebd..e59cef716 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_basic.txt @@ -1,4 +1,3 @@ ROOT(columns=[('year', year), ('month', month), ('total_orders', total_orders)], orderings=[]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'total_orders': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'total_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index e4eb7a12f..91f0efac3 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[]) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 8279c92ac..3c30882d8 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), ('total_orders', DEFAULT_TO(agg_1, 0:numeric))], orderings=[]) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_partition.txt b/tests/test_plan_refsols/agg_partition.txt index 9532d9962..9b0bc75da 100644 --- a/tests/test_plan_refsols/agg_partition.txt +++ b/tests/test_plan_refsols/agg_partition.txt @@ -1,5 +1,4 @@ ROOT(columns=[('best_year', best_year)], orderings=[]) AGGREGATE(keys={}, aggregations={'best_year': MAX(n_orders)}) - AGGREGATE(keys={'year': year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 263763bd7..4efebacf2 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,8 +1,7 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(ratio)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'ratio': l_quantity / ps_availqty}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 6ab7d55c8..3e68d185b 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,6 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index f83dbaec8..e77367f86 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -7,15 +7,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 2be73d9d4..af9436cf0 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 0945b2982..cf4d7e4f7 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'l_quantity': l_quantity, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'revenue': l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 7bc4311aa..c08e7d33f 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -4,6 +4,5 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_ AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(expr_1), 'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'c_nationkey': c_nationkey, 'expr_1': LARGEST(c_acctbal, 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(LARGEST(c_acctbal, 0:numeric)), 'sum_expr_1': SUM(LARGEST(c_acctbal, 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index 94e733398..e00d11d91 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index a2ecdb1b4..e57b541ed 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -21,24 +21,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index e5ee9d7a2..a2d4305ca 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -12,24 +12,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAUL SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index d544d0cf7..f28e37a55 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -9,24 +9,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index b6ea23a97..4df2fe150 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -21,24 +21,23 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(revenue)}) - PROJECT(columns={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey, 'revenue': l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4d1bb2447..6d74fea0f 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - PROJECT(columns={'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric), 'n_suppliers': SUM(n_suppliers)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 953b939f0..e9ba7c35d 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -2,20 +2,18 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 6676f3f68..1bb9d040a 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -2,25 +2,24 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows, 'sum_n_rows_2': sum_n_rows_2}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index ba7632a6d..bf725b8e1 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,9 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index dd8ca64e5..a30d8068c 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - PROJECT(columns={'agg_1': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 848a95bb5..d4b65fe8d 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - PROJECT(columns={'agg_2': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'n_suppliers': n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(1:numeric)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index cda098921..c777a3423 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -2,20 +2,18 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - PROJECT(columns={'agg_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18_0': sum_sum_expr_18_0, 'sum_sum_n_rows': sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'expr_18_0': 1:numeric, 'n_rows': n_rows, 's_nationkey': s_nationkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index feffdfb1a..93bd2de6c 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -7,11 +7,10 @@ ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 6d7af3f9c..264641ef8 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -10,11 +10,10 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(expr_1), 'sum_p_retailprice': SUM(p_retailprice)}) - PROJECT(columns={'expr_1': IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric), 'p_retailprice': p_retailprice, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 1217865d6..0afce8592 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -3,11 +3,10 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orde FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) + FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) + PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index e7c7cde56..6c7d2bd5a 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,20 +1,19 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name_0': ANYTHING(n_name)}) - PROJECT(columns={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_selected_purchases': 1:numeric}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(1:numeric), 'nation_name_0': ANYTHING(n_name)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index ac9583af3..9bca98b3e 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -2,19 +2,18 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 3f6839aaf..f83a5a536 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -2,17 +2,16 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(agg_0)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 9f4248442..2e633f4f9 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,20 +1,19 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(region_name), 'n_rows': COUNT()}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'region_name': LOWER(r_name)}) - FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=s_acctbal > avg_supp_acctbal, columns={'n_nationkey': n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index c868fedf4..a4cd83a96 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,17 +1,16 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(revenue), 'median_rev': MEDIAN(revenue), 'nation_name': ANYTHING(n_name)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'revenue': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(l_extendedprice * 1:numeric - l_discount), 'median_rev': MEDIAN(l_extendedprice * 1:numeric - l_discount), 'nation_name': ANYTHING(n_name)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=MONTH(o_orderdate) == 1:numeric & YEAR(o_orderdate) == 1996:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_tax < 0.05:numeric & l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 21bb4c0e6..a1e3920bd 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -5,16 +5,15 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': year}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - PROJECT(columns={'agg_0': 1:numeric, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'sum_l_extendedprice': sum_l_extendedprice, 'year': YEAR(o_orderdate)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 97aba3b36..c85560614 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -8,7 +8,6 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, m JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(r)}) - PROJECT(columns={'l_orderkey': l_orderkey, 'r': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_shipmode == 'AIR':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) diff --git a/tests/test_plan_refsols/double_partition.txt b/tests/test_plan_refsols/double_partition.txt index 28d3238b9..4a8f45c91 100644 --- a/tests/test_plan_refsols/double_partition.txt +++ b/tests/test_plan_refsols/double_partition.txt @@ -1,5 +1,4 @@ ROOT(columns=[('year', year), ('best_month', best_month)], orderings=[]) AGGREGATE(keys={'year': year}, aggregations={'best_month': MAX(n_orders)}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index a39171a35..0e76d9871 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,25 +1,23 @@ ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': DEFAULT_TO(n_rows, 0:numeric) > 0:numeric, 's_name': s_name}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(is_intra_season)}) - PROJECT(columns={'is_intra_season': name_9 == s_name, 's_name': s_name}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) - SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) + AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) + SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) + SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/global_acctbal_breakdown.txt b/tests/test_plan_refsols/global_acctbal_breakdown.txt index 0625bd292..849cf5736 100644 --- a/tests/test_plan_refsols/global_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/global_acctbal_breakdown.txt @@ -1,4 +1,3 @@ ROOT(columns=[('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[]) - AGGREGATE(keys={}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/highest_priority_per_year.txt b/tests/test_plan_refsols/highest_priority_per_year.txt index 2ea818a88..68046f37a 100644 --- a/tests/test_plan_refsols/highest_priority_per_year.txt +++ b/tests/test_plan_refsols/highest_priority_per_year.txt @@ -1,6 +1,5 @@ ROOT(columns=[('order_year', order_year), ('highest_priority', o_orderpriority), ('priority_pct', priority_pct)], orderings=[(order_year):asc_first]) FILTER(condition=RANKING(args=[], partition=[order_year], order=[(priority_pct):desc_first]) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'order_year': order_year, 'priority_pct': priority_pct}) PROJECT(columns={'o_orderpriority': o_orderpriority, 'order_year': order_year, 'priority_pct': 100.0:numeric * n_orders / RELSUM(args=[n_orders], partition=[order_year], order=[])}) - AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'o_orderpriority': o_orderpriority, 'order_year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index d438e6e1e..333521c0c 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -3,13 +3,10 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (m JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(month_total_spent)}) - PROJECT(columns={'month_total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric), 'year': year}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(DEFAULT_TO(sum_o_totalprice, 0:numeric))}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 4de6c527a..977b01033 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -4,6 +4,5 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_bla SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index fef1cca7d..a556f2e22 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,7 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) - AGGREGATE(keys={}, aggregations={'avg_gap': AVG(day_gap)}) - PROJECT(columns={'day_gap': DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) - FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 1dd3998d9..d58e9d39e 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,8 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(non_negative_acctbal), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(negative_acctbal), 'n_black_acctbal': COUNT(non_negative_acctbal), 'n_red_acctbal': COUNT(negative_acctbal)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey, 'negative_acctbal': KEEP_IF(c_acctbal, c_acctbal < 0:numeric), 'non_negative_acctbal': KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 85f3089c8..05e709ed1 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,5 +1,4 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VARIANCE(key_0), 'var_pop_1_nnull': POPULATION_VARIANCE(key_1), 'var_pop_2_nnull': POPULATION_VARIANCE(key_2), 'var_samp_0_nnull': SAMPLE_VARIANCE(key_0), 'var_samp_1_nnull': SAMPLE_VARIANCE(key_1), 'var_samp_2_nnull': SAMPLE_VARIANCE(key_2)}) - PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) - FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) + FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt index 7669d2e75..a0c333d7e 100644 --- a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt +++ b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt @@ -1,14 +1,11 @@ ROOT(columns=[('s1', combine_strings_r_name), ('s2', agg_1), ('s3', agg_2), ('s4', agg_3)], orderings=[]) JOIN(condition=True:bool, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.agg_3, 'combine_strings_r_name': t0.combine_strings_r_name}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.agg_2, 'combine_strings_r_name': t0.combine_strings_r_name}) - AGGREGATE(keys={}, aggregations={'agg_1': COMBINE_STRINGS(n, ', ':string), 'combine_strings_r_name': COMBINE_STRINGS(r_name)}) - PROJECT(columns={'n': KEEP_IF(r_name, r_name != 'EUROPE':string), 'r_name': r_name}) - SCAN(table=tpch.REGION, columns={'r_name': r_name}) - AGGREGATE(keys={}, aggregations={'agg_2': COMBINE_STRINGS(expr_4, '':string)}) - PROJECT(columns={'expr_4': SLICE(n_name, None:unknown, 1:numeric, None:unknown)}) - SCAN(table=tpch.NATION, columns={'n_name': n_name}) - AGGREGATE(keys={}, aggregations={'agg_3': COMBINE_STRINGS(expr_5, ' <=> ':string)}) - PROJECT(columns={'expr_5': SLICE(o_orderpriority, 2:numeric, None:unknown, None:unknown)}) - AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={}, aggregations={'agg_1': COMBINE_STRINGS(KEEP_IF(r_name, r_name != 'EUROPE':string), ', ':string), 'combine_strings_r_name': COMBINE_STRINGS(r_name)}) + SCAN(table=tpch.REGION, columns={'r_name': r_name}) + AGGREGATE(keys={}, aggregations={'agg_2': COMBINE_STRINGS(SLICE(n_name, None:unknown, 1:numeric, None:unknown), '':string)}) + SCAN(table=tpch.NATION, columns={'n_name': n_name}) + AGGREGATE(keys={}, aggregations={'agg_3': COMBINE_STRINGS(SLICE(o_orderpriority, 2:numeric, None:unknown, None:unknown), ' <=> ':string)}) + AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 03ef1ee24..8dbd1e7f3 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,12 +1,11 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, expr_1)}) - PROJECT(columns={'c_acctbal': c_acctbal, 'expr_1': o_totalprice / 1000000.0:numeric, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index 08ff9065b..5986f6668 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -1,9 +1,8 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(expr_1)}) - PROJECT(columns={'expr_1': DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool)}) - FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) + FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) + PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt index f102a46f0..30719771b 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt @@ -1,6 +1,5 @@ ROOT(columns=[('pct_e1', ROUND(percentage_expr_5, 4:numeric)), ('pct_e10', ROUND(percentage_expr_6, 4:numeric)), ('pct_e100', ROUND(percentage_expr_7, 4:numeric)), ('pct_e1000', ROUND(percentage_expr_8, 4:numeric)), ('pct_e10000', ROUND(percentage_expr_9, 4:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(expr_5), 'percentage_expr_6': PERCENTAGE(expr_6), 'percentage_expr_7': PERCENTAGE(expr_7), 'percentage_expr_8': PERCENTAGE(expr_8), 'percentage_expr_9': PERCENTAGE(expr_9)}) - PROJECT(columns={'expr_5': EPSILON(o_totalprice, global_avg, 1:numeric), 'expr_6': EPSILON(o_totalprice, global_avg, 10:numeric), 'expr_7': EPSILON(o_totalprice, global_avg, 100:numeric), 'expr_8': EPSILON(o_totalprice, global_avg, 1000:numeric), 'expr_9': EPSILON(o_totalprice, global_avg, 10000:numeric)}) - PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1:numeric)), 'percentage_expr_6': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10:numeric)), 'percentage_expr_7': PERCENTAGE(EPSILON(o_totalprice, global_avg, 100:numeric)), 'percentage_expr_8': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1000:numeric)), 'percentage_expr_9': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10000:numeric))}) + PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index b9b7d165d..c0b287617 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -2,13 +2,11 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2 JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(expr_2)}) - PROJECT(columns={'expr_2': POSITIVE(c_acctbal), 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(expr_3)}) - PROJECT(columns={'expr_3': POSITIVE(s_acctbal), 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(POSITIVE(s_acctbal))}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_relmin.txt b/tests/test_plan_refsols/sqlite_udf_relmin.txt index a4606bb58..82015e622 100644 --- a/tests/test_plan_refsols/sqlite_udf_relmin.txt +++ b/tests/test_plan_refsols/sqlite_udf_relmin.txt @@ -1,5 +1,4 @@ ROOT(columns=[('month', month), ('n_orders', n_rows), ('m1', RELMIN(args=[n_rows], partition=[], order=[])), ('m2', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], cumulative=True)), ('m3', RELMIN(args=[n_rows], partition=[], order=[(month):asc_last], frame=(-1, 1)))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'month': MONTH(o_orderdate)}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'month': MONTH(o_orderdate)}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 995700439..df78044d3 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,14 +1,13 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'co_name': co_name, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_name': pr_name}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) - SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) + FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) + SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 89fe7388c..0c76e958a 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,12 +1,11 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 79a611d7c..86f829343 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,15 +1,13 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - AGGREGATE(keys={'release_year': release_year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'release_year': YEAR(pr_release)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 5914783da..fdf768b85 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,9 +1,8 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'n_incidents': DEFAULT_TO(n_rows, 0:numeric), 'pr_brand': pr_brand}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 8aa3a87db..26d7d259d 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,6 +1,6 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': month, 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'month': MONTH(ca_dt), 'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': year}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1, 'year': year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 46de1e87a..19a48c5d6 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,11 +1,10 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_incidents)}) - PROJECT(columns={'de_product_id': de_product_id, 'n_incidents': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 8eac1f4c8..f51c8a594 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,8 +4,8 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_4': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 4c754cbe5..dd5c32202 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,8 +1,8 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'n_rows': n_rows, 'n_rows_1': n_rows_1, 'year': YEAR(ca_dt)}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) + PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/tpch_q1.txt b/tests/test_plan_refsols/tpch_q1.txt index 14e38c38f..7588aed5a 100644 --- a/tests/test_plan_refsols/tpch_q1.txt +++ b/tests/test_plan_refsols/tpch_q1.txt @@ -1,5 +1,4 @@ ROOT(columns=[('L_RETURNFLAG', l_returnflag), ('L_LINESTATUS', l_linestatus), ('SUM_QTY', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('SUM_BASE_PRICE', DEFAULT_TO(sum_l_extendedprice, 0:numeric)), ('SUM_DISC_PRICE', DEFAULT_TO(sum_expr_9, 0:numeric)), ('SUM_CHARGE', DEFAULT_TO(sum_expr_8, 0:numeric)), ('AVG_QTY', avg_l_quantity), ('AVG_PRICE', avg_l_extendedprice), ('AVG_DISC', avg_l_discount), ('COUNT_ORDER', n_rows)], orderings=[(l_returnflag):asc_first, (l_linestatus):asc_first]) - AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(expr_8), 'sum_expr_9': SUM(expr_9), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) - PROJECT(columns={'expr_8': l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax, 'expr_9': l_extendedprice * 1:numeric - l_discount, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag}) - FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) + AGGREGATE(keys={'l_linestatus': l_linestatus, 'l_returnflag': l_returnflag}, aggregations={'avg_l_discount': AVG(l_discount), 'avg_l_extendedprice': AVG(l_extendedprice), 'avg_l_quantity': AVG(l_quantity), 'n_rows': COUNT(), 'sum_expr_8': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric + l_tax), 'sum_expr_9': SUM(l_extendedprice * 1:numeric - l_discount), 'sum_l_extendedprice': SUM(l_extendedprice), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=l_shipdate <= datetime.date(1998, 12, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_linestatus': l_linestatus, 'l_quantity': l_quantity, 'l_returnflag': l_returnflag, 'l_shipdate': l_shipdate, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index cc56c28d9..578e13359 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -2,11 +2,10 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_ JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_returnflag': l_returnflag}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 633b50afb..8b4fb4c9e 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -2,19 +2,17 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE): FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(metric)}) - PROJECT(columns={'metric': ps_supplycost * ps_availqty}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': ps_supplycost * ps_availqty, 'ps_partkey': ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index 7192a4e23..e12415ca9 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,7 +1,6 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) - AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(expr_2), 'sum_is_high_priority': SUM(is_high_priority)}) - PROJECT(columns={'expr_2': NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown])), 'is_high_priority': ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]), 'l_shipmode': l_shipmode}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) - FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))), 'sum_is_high_priority': SUM(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index ee9fe24e8..fd7b629fc 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,8 +1,7 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) - AGGREGATE(keys={'num_non_special_orders': num_non_special_orders}, aggregations={'CUSTDIST': COUNT()}) - PROJECT(columns={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) + AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_comment': o_comment, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q14.txt b/tests/test_plan_refsols/tpch_q14.txt index 672f4aaa9..ea3385773 100644 --- a/tests/test_plan_refsols/tpch_q14.txt +++ b/tests/test_plan_refsols/tpch_q14.txt @@ -1,7 +1,6 @@ ROOT(columns=[('PROMO_REVENUE', 100.0:numeric * DEFAULT_TO(sum_promo_value, 0:numeric) / DEFAULT_TO(sum_value, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(promo_value), 'sum_value': SUM(value)}) - PROJECT(columns={'promo_value': IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric), 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) - FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) + FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index ffb308cdf..b42afa376 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,16 +1,13 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'max_revenue': MAX(total_revenue)}) - PROJECT(columns={'total_revenue': DEFAULT_TO(sum_expr_2, 0:numeric)}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) - SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(expr_2)}) - PROJECT(columns={'expr_2': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_phone': s_phone, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(expr_3)}) - PROJECT(columns={'expr_3': l_extendedprice * 1:numeric - l_discount, 'l_suppkey': l_suppkey}) - FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_3': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index a31db3ead..6a4b3eccd 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,8 +1,7 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount}) - FILTER(condition=MONOTONIC(1:numeric, p_size, 5:numeric) & MONOTONIC(1:numeric, l_quantity, 11:numeric) & ISIN(p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & p_brand == 'Brand#12':string | MONOTONIC(1:numeric, p_size, 10:numeric) & MONOTONIC(10:numeric, l_quantity, 20:numeric) & ISIN(p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & p_brand == 'Brand#23':string | MONOTONIC(1:numeric, p_size, 15:numeric) & MONOTONIC(20:numeric, l_quantity, 30:numeric) & ISIN(p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & p_brand == 'Brand#34':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'p_brand': t1.p_brand, 'p_container': t1.p_container, 'p_size': t1.p_size}) - FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) - SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + FILTER(condition=MONOTONIC(1:numeric, p_size, 5:numeric) & MONOTONIC(1:numeric, l_quantity, 11:numeric) & ISIN(p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & p_brand == 'Brand#12':string | MONOTONIC(1:numeric, p_size, 10:numeric) & MONOTONIC(10:numeric, l_quantity, 20:numeric) & ISIN(p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & p_brand == 'Brand#23':string | MONOTONIC(1:numeric, p_size, 15:numeric) & MONOTONIC(20:numeric, l_quantity, 30:numeric) & ISIN(p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & p_brand == 'Brand#34':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'p_brand': t1.p_brand, 'p_container': t1.p_container, 'p_size': t1.p_size}) + FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) + SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index f6c53f4e2..fa8154ac2 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,11 +1,10 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(expr_1)}) - PROJECT(columns={'expr_1': l_extendedprice * 1:numeric - l_discount, 'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) - FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) - FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) + FILTER(condition=l_shipdate > datetime.date(1995, 3, 15):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index e8f41b9b7..44c860096 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,18 +1,17 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'value': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q6.txt b/tests/test_plan_refsols/tpch_q6.txt index 8181a5a2f..57562829d 100644 --- a/tests/test_plan_refsols/tpch_q6.txt +++ b/tests/test_plan_refsols/tpch_q6.txt @@ -1,5 +1,4 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_amt, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_amt': SUM(amt)}) - PROJECT(columns={'amt': l_extendedprice * l_discount}) - FILTER(condition=l_discount <= 0.07:numeric & l_quantity < 24:numeric & l_shipdate < datetime.date(1995, 1, 1):datetime & l_discount >= 0.05:numeric & l_shipdate >= datetime.date(1994, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={}, aggregations={'sum_amt': SUM(l_extendedprice * l_discount)}) + FILTER(condition=l_discount <= 0.07:numeric & l_quantity < 24:numeric & l_shipdate < datetime.date(1995, 1, 1):datetime & l_discount >= 0.05:numeric & l_shipdate >= datetime.date(1994, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index e721ef7c6..8be0f5377 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,6 +1,6 @@ ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) - AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': l_year, 'n_name': n_name}, aggregations={'sum_volume': SUM(volume)}) - PROJECT(columns={'cust_nation': name_8, 'l_year': YEAR(l_shipdate), 'n_name': n_name, 'volume': l_extendedprice * 1:numeric - l_discount}) + AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': YEAR(l_shipdate), 'n_name': n_name}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) + PROJECT(columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index 4f13bff60..e19874147 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,21 +1,20 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) - AGGREGATE(keys={'O_YEAR': O_YEAR}, aggregations={'sum_brazil_volume': SUM(brazil_volume), 'sum_volume': SUM(volume)}) - PROJECT(columns={'O_YEAR': YEAR(o_orderdate), 'brazil_volume': IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric), 'volume': l_extendedprice * 1:numeric - l_discount}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) - FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index 634b9ee27..b82a25527 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,15 +1,14 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) - AGGREGATE(keys={'n_name': n_name, 'o_year': o_year}, aggregations={'sum_value': SUM(value)}) - PROJECT(columns={'n_name': n_name, 'o_year': YEAR(o_orderdate), 'value': l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + AGGREGATE(keys={'n_name': n_name, 'o_year': YEAR(o_orderdate)}, aggregations={'sum_value': SUM(l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 7193ed6f9..a5ed33abe 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,25 +1,24 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) - AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(percentage)}) - PROJECT(columns={'percentage': 100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric), 'supp_region': supp_region}) - AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric))}) + AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index 2628b6b63..b91e4cd3c 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,12 +1,11 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) - AGGREGATE(keys={'n_name': n_name, 'order_month': order_month, 'order_year': order_year}, aggregations={'n_orders': COUNT()}) - PROJECT(columns={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) - FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/yoy_change_in_num_orders.txt b/tests/test_plan_refsols/yoy_change_in_num_orders.txt index 8a2b5f26c..e4d7df518 100644 --- a/tests/test_plan_refsols/yoy_change_in_num_orders.txt +++ b/tests/test_plan_refsols/yoy_change_in_num_orders.txt @@ -1,4 +1,3 @@ ROOT(columns=[('year', year), ('current_year_orders', n_rows), ('pct_change', 100.0:numeric * n_rows - PREV(args=[n_rows], partition=[], order=[(year):asc_last]) / PREV(args=[n_rows], partition=[], order=[(year):asc_last]))], orderings=[(year):asc_first]) - AGGREGATE(keys={'year': year}, aggregations={'n_rows': COUNT()}) - PROJECT(columns={'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index ab77ecbff..48ac2f401 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -3,7 +3,6 @@ WITH _s0 AS ( COUNT(sbdpclose) AS count_sbdpclose, MAX(sbdphigh) AS max_high, MIN(sbdplow) AS min_low, - SUM(sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), @@ -15,6 +14,7 @@ WITH _s0 AS ( )) END ) AS month, + SUM(sbdpclose) AS sum_sbdpclose, sbdptickerid FROM main.sbdailyprice GROUP BY diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index e74219954..8a07d126c 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -3,7 +3,6 @@ WITH _s0 AS ( COUNT(sbdpclose) AS count_sbdpclose, MAX(sbdphigh) AS max_high, MIN(sbdplow) AS min_low, - SUM(sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), @@ -15,6 +14,7 @@ WITH _s0 AS ( )) END ) AS month, + SUM(sbdpclose) AS sum_sbdpclose, sbdptickerid FROM main.sbdailyprice GROUP BY diff --git a/tests/test_sql_refsols/defog_broker_adv7_ansi.sql b/tests/test_sql_refsols/defog_broker_adv7_ansi.sql index 7c09391bf..32d6c5ab1 100644 --- a/tests/test_sql_refsols/defog_broker_adv7_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv7_ansi.sql @@ -1,6 +1,5 @@ WITH _s2 AS ( SELECT - COUNT(*) AS n_rows, CONCAT_WS( '-', EXTRACT(YEAR FROM CAST(sbcustjoindate AS DATETIME)), @@ -11,7 +10,8 @@ WITH _s2 AS ( 2 * -1 )) END - ) AS month + ) AS month, + COUNT(*) AS n_rows FROM main.sbcustomer WHERE sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) diff --git a/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql index cf9b01eb3..ad71b6d6a 100644 --- a/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv7_sqlite.sql @@ -1,6 +1,5 @@ WITH _s2 AS ( SELECT - COUNT(*) AS n_rows, CONCAT_WS( '-', CAST(STRFTIME('%Y', sbcustjoindate) AS INTEGER), @@ -11,7 +10,8 @@ WITH _s2 AS ( 2 * -1 )) END - ) AS month + ) AS month, + COUNT(*) AS n_rows FROM main.sbcustomer WHERE sbcustjoindate < DATE('now', 'start of month') diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 2873935fa..cafd78875 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,14 +1,14 @@ WITH _s0 AS ( SELECT + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, SUM(sale_price) AS sum_sale_price, - customer_id, - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter + customer_id FROM main.sales WHERE EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - customer_id, - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), + customer_id ), _t2 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index a5e9a790c..d9d271f6e 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,7 +1,5 @@ WITH _s0 AS ( SELECT - SUM(sale_price) AS sum_sale_price, - customer_id, DATE( sale_date, 'start of month', @@ -10,12 +8,13 @@ WITH _s0 AS ( CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) AS quarter + ) AS quarter, + SUM(sale_price) AS sum_sale_price, + customer_id FROM main.sales WHERE CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - customer_id, DATE( sale_date, 'start of month', @@ -24,7 +23,8 @@ WITH _s0 AS ( CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) + ), + customer_id ), _t2 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, diff --git a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql index a91e49158..60956d7c9 100644 --- a/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_combine_strings_sqlite.sql @@ -7,7 +7,7 @@ WITH _s0 AS ( SELECT GROUP_CONCAT(SUBSTRING(n_name, 1, 1), '') AS agg_2 FROM tpch.nation -), _t5 AS ( +), _t2 AS ( SELECT DISTINCT o_orderpriority FROM tpch.orders @@ -16,7 +16,7 @@ WITH _s0 AS ( ), _s3 AS ( SELECT GROUP_CONCAT(SUBSTRING(o_orderpriority, 3), ' <=> ') AS agg_3 - FROM _t5 + FROM _t2 ) SELECT _s0.combine_strings_r_name AS s1, diff --git a/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql index bc9653a8a..769eda523 100644 --- a/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_covar_pop_sqlite.sql @@ -1,24 +1,16 @@ WITH _s5 AS ( SELECT CAST(( - SUM(customer.c_acctbal * ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - )) - CAST(SUM(customer.c_acctbal) * SUM(( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - )) AS REAL) / SUM( + SUM(CAST(customer.c_acctbal * orders.o_totalprice AS REAL) / 1000000.0) - CAST(SUM(customer.c_acctbal) * SUM(CAST(orders.o_totalprice AS REAL) / 1000000.0) AS REAL) / SUM( CASE - WHEN NOT ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - ) IS NULL + WHEN NOT CAST(orders.o_totalprice AS REAL) / 1000000.0 IS NULL AND NOT customer.c_acctbal IS NULL THEN 1 END ) ) AS REAL) / SUM( CASE - WHEN NOT ( - CAST(orders.o_totalprice AS REAL) / 1000000.0 - ) IS NULL + WHEN NOT CAST(orders.o_totalprice AS REAL) / 1000000.0 IS NULL AND NOT customer.c_acctbal IS NULL THEN 1 END diff --git a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql index 400533afd..2bc509c65 100644 --- a/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_nested_sqlite.sql @@ -6,7 +6,7 @@ WITH _s1 AS ( FROM tpch.orders GROUP BY o_custkey -), _t3 AS ( +), _t2 AS ( SELECT MIN(customer.c_acctbal) OVER () AS min_bal, customer.c_acctbal, @@ -37,6 +37,6 @@ SELECT ) AS REAL) / COUNT(*), 2 ) AS p -FROM _t3 +FROM _t2 WHERE n_rows > 0 diff --git a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql index 1ae607753..6375ec3a9 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t2 AS ( +WITH _t1 AS ( SELECT AVG(o_totalprice) OVER () AS global_avg, o_totalprice @@ -37,4 +37,4 @@ SELECT ) AS REAL) / COUNT(*), 4 ) AS pct_e10000 -FROM _t2 +FROM _t1 diff --git a/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql index 7811d9d12..d32c59dde 100644 --- a/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_relmin_sqlite.sql @@ -1,7 +1,7 @@ WITH _t0 AS ( SELECT - COUNT(*) AS n_rows, - CAST(STRFTIME('%m', o_orderdate) AS INTEGER) AS month + CAST(STRFTIME('%m', o_orderdate) AS INTEGER) AS month, + COUNT(*) AS n_rows FROM tpch.orders WHERE CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1994 diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index 7a8882408..c00e952eb 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -12,8 +12,8 @@ WITH _s0 AS ( FROM main.products ), _s6 AS ( SELECT - SUM(_s0.n_rows) AS sum_n_rows, - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year + EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year, + SUM(_s0.n_rows) AS sum_n_rows FROM _s0 AS _s0 JOIN _s1 AS _s1 ON _s0.de_product_id = _s1.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 5ef83cf6a..df94defe5 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -12,8 +12,8 @@ WITH _s0 AS ( FROM main.products ), _s6 AS ( SELECT - SUM(_s0.n_rows) AS sum_n_rows, - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year + CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year, + SUM(_s0.n_rows) AS sum_n_rows FROM _s0 AS _s0 JOIN _s1 AS _s1 ON _s0.de_product_id = _s1.pr_id diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index a80b170ef..1fd157a20 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t7 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t6.ca_dt - FROM _t4 AS _t6 + _t5.ca_dt + FROM _t3 AS _t5 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t6.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t5.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t6.ca_dt + _t5.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t9.ca_dt - FROM _t4 AS _t9 + _t8.ca_dt + FROM _t3 AS _t8 JOIN main.incidents AS incidents - ON _t9.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t10 - ON _t10.co_id = devices.de_production_country_id + JOIN _t6 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY - _t9.ca_dt + _t8.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t4 AS _t4 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t4.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t4.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index de7f0b427..b30eff167 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t4 AS ( +WITH _t3 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t7 AS ( +), _t6 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t4 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t6.ca_dt - FROM _t4 AS _t6 + _t5.ca_dt + FROM _t3 AS _t5 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t6.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t5.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t7 - ON _t7.co_id = devices.de_production_country_id + JOIN _t6 AS _t6 + ON _t6.co_id = devices.de_production_country_id GROUP BY - _t6.ca_dt + _t5.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t9.ca_dt - FROM _t4 AS _t9 + _t8.ca_dt + FROM _t3 AS _t8 JOIN main.incidents AS incidents - ON _t9.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t10 - ON _t10.co_id = devices.de_production_country_id + JOIN _t6 AS _t9 + ON _t9.co_id = devices.de_production_country_id GROUP BY - _t9.ca_dt + _t8.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t4 AS _t4 +FROM _t3 AS _t3 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t4.ca_dt + ON _s7.ca_dt = _t3.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t4.ca_dt + ON _s15.ca_dt = _t3.ca_dt GROUP BY - CAST(STRFTIME('%m', _t4.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t4.ca_dt) AS INTEGER) + CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql index d56b6a7e2..1068846d6 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_ansi.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t7 AS ( +), _t6 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t7 - ON _t7.pr_id = devices.de_product_id + JOIN _t6 AS _t6 + ON _t6.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t7 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t6 AS _t8 + ON _t8.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql index 40f2461ce..7c9c0c3f5 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_sqlite.sql @@ -8,7 +8,7 @@ WITH _s14 AS ( SELECT ca_dt FROM main.calendar -), _t7 AS ( +), _t6 AS ( SELECT pr_id, pr_name @@ -24,8 +24,8 @@ WITH _s14 AS ( ON _s0.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t7 AS _t7 - ON _t7.pr_id = devices.de_product_id + JOIN _t6 AS _t6 + ON _t6.pr_id = devices.de_product_id GROUP BY _s0.ca_dt ), _s13 AS ( @@ -35,8 +35,8 @@ WITH _s14 AS ( FROM _s6 AS _s8 JOIN main.devices AS devices ON _s8.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t7 AS _t9 - ON _t9.pr_id = devices.de_product_id + JOIN _t6 AS _t8 + ON _t8.pr_id = devices.de_product_id GROUP BY _s8.ca_dt ), _s15 AS ( diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index f76f36d96..212223900 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -3,7 +3,7 @@ WITH _s0 AS ( s_nationkey, s_suppkey FROM tpch.supplier -), _t4 AS ( +), _t3 AS ( SELECT n_name, n_nationkey @@ -16,8 +16,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t4 - ON _s0.s_nationkey = _t4.n_nationkey + JOIN _t3 AS _t3 + ON _s0.s_nationkey = _t3.n_nationkey ), _s9 AS ( SELECT SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, @@ -25,8 +25,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t7 - ON _s4.s_nationkey = _t7.n_nationkey + JOIN _t3 AS _t5 + ON _s4.s_nationkey = _t5.n_nationkey GROUP BY partsupp.ps_partkey ) diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index f76f36d96..212223900 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -3,7 +3,7 @@ WITH _s0 AS ( s_nationkey, s_suppkey FROM tpch.supplier -), _t4 AS ( +), _t3 AS ( SELECT n_name, n_nationkey @@ -16,8 +16,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t4 - ON _s0.s_nationkey = _t4.n_nationkey + JOIN _t3 AS _t3 + ON _s0.s_nationkey = _t3.n_nationkey ), _s9 AS ( SELECT SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, @@ -25,8 +25,8 @@ WITH _s0 AS ( FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey - JOIN _t4 AS _t7 - ON _s4.s_nationkey = _t7.n_nationkey + JOIN _t3 AS _t5 + ON _s4.s_nationkey = _t5.n_nationkey GROUP BY partsupp.ps_partkey ) diff --git a/tests/test_sql_refsols/tpch_q15_ansi.sql b/tests/test_sql_refsols/tpch_q15_ansi.sql index fdfcf8468..9e3993845 100644 --- a/tests/test_sql_refsols/tpch_q15_ansi.sql +++ b/tests/test_sql_refsols/tpch_q15_ansi.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT l_discount, l_extendedprice, @@ -14,7 +14,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ), _s2 AS ( @@ -29,7 +29,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_3, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ) diff --git a/tests/test_sql_refsols/tpch_q15_sqlite.sql b/tests/test_sql_refsols/tpch_q15_sqlite.sql index 4b6f85ec0..ab90cff0e 100644 --- a/tests/test_sql_refsols/tpch_q15_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q15_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t5 AS ( +WITH _t3 AS ( SELECT l_discount, l_extendedprice, @@ -13,7 +13,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_2, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ), _s2 AS ( @@ -28,7 +28,7 @@ WITH _t5 AS ( 1 - l_discount )) AS sum_expr_3, l_suppkey - FROM _t5 + FROM _t3 GROUP BY l_suppkey ) diff --git a/tests/test_sql_refsols/tpch_q7_ansi.sql b/tests/test_sql_refsols/tpch_q7_ansi.sql index 658b8e12a..8a66ea8ad 100644 --- a/tests/test_sql_refsols/tpch_q7_ansi.sql +++ b/tests/test_sql_refsols/tpch_q7_ansi.sql @@ -38,8 +38,8 @@ JOIN _s9 AS _s9 WHERE EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS DATETIME)) IN (1995, 1996) GROUP BY - _s9.n_name, EXTRACT(YEAR FROM CAST(lineitem.l_shipdate AS DATETIME)), + _s9.n_name, _s1.n_name ORDER BY _s1.n_name, diff --git a/tests/test_sql_refsols/tpch_q7_sqlite.sql b/tests/test_sql_refsols/tpch_q7_sqlite.sql index 04758cf86..8b8a9ea7f 100644 --- a/tests/test_sql_refsols/tpch_q7_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q7_sqlite.sql @@ -38,8 +38,8 @@ JOIN _s9 AS _s9 WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) IN (1995, 1996) GROUP BY - _s9.n_name, CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER), + _s9.n_name, _s1.n_name ORDER BY _s1.n_name, diff --git a/tests/test_sql_refsols/tpch_q9_ansi.sql b/tests/test_sql_refsols/tpch_q9_ansi.sql index 12fe871b0..61f920400 100644 --- a/tests/test_sql_refsols/tpch_q9_ansi.sql +++ b/tests/test_sql_refsols/tpch_q9_ansi.sql @@ -22,8 +22,8 @@ JOIN tpch.partsupp AS partsupp ON lineitem.l_partkey = partsupp.ps_partkey AND lineitem.l_suppkey = partsupp.ps_suppkey GROUP BY - nation.n_name, - EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) + EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)), + nation.n_name ORDER BY nation.n_name, o_year DESC diff --git a/tests/test_sql_refsols/tpch_q9_sqlite.sql b/tests/test_sql_refsols/tpch_q9_sqlite.sql index 37e726db9..0a5405f0d 100644 --- a/tests/test_sql_refsols/tpch_q9_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q9_sqlite.sql @@ -22,8 +22,8 @@ JOIN tpch.partsupp AS partsupp ON lineitem.l_partkey = partsupp.ps_partkey AND lineitem.l_suppkey = partsupp.ps_suppkey GROUP BY - nation.n_name, - CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) + CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER), + nation.n_name ORDER BY nation.n_name, o_year DESC From 08925815c4d320fbfdcc5102bbaf96cce5c7382c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 13 Jul 2025 13:59:55 -0400 Subject: [PATCH 17/97] Added SUM(1)->COUNT() optimization --- pydough/conversion/projection_pullup.py | 26 ++++++++++++++++++- tests/test_plan_refsols/common_prefix_a.txt | 2 +- tests/test_plan_refsols/common_prefix_b.txt | 2 +- tests/test_plan_refsols/common_prefix_c.txt | 4 +-- tests/test_plan_refsols/common_prefix_d.txt | 2 +- tests/test_plan_refsols/common_prefix_e.txt | 2 +- tests/test_plan_refsols/common_prefix_f.txt | 2 +- tests/test_plan_refsols/common_prefix_g.txt | 2 +- tests/test_plan_refsols/common_prefix_h.txt | 4 +-- tests/test_plan_refsols/correl_26.txt | 2 +- tests/test_plan_refsols/correl_27.txt | 2 +- tests/test_plan_refsols/correl_28.txt | 2 +- .../count_cust_supplier_nation_combos.txt | 2 +- 13 files changed, 39 insertions(+), 15 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 4e353c914..b4f013a76 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -7,6 +7,7 @@ __all__ = ["pullup_projections"] +import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, CallExpression, @@ -16,6 +17,7 @@ Join, JoinType, Limit, + LiteralExpression, Project, RelationalExpression, RelationalNode, @@ -29,6 +31,7 @@ from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) +from pydough.types import NumericType from .merge_projects import merge_adjacent_projects @@ -272,7 +275,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: for name, expr in node.aggregations.items(): new_expr = apply_substitution(expr, substitutions, {}) assert isinstance(new_expr, CallExpression) - new_aggs[name] = new_expr + new_aggs[name] = simplify_agg(new_expr) return Aggregate( input=node.input, keys=new_keys, @@ -280,6 +283,27 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: ) +def simplify_agg(agg: CallExpression) -> CallExpression: + """ + TODO + """ + arg: RelationalExpression + if agg.op == pydop.SUM: + arg = agg.inputs[0] + if ( + isinstance(arg, LiteralExpression) + and isinstance(arg.data_type, NumericType) + and arg.value == 1 + ): + return CallExpression( + op=pydop.COUNT, + return_type=agg.data_type, + inputs=[], + ) + # In all other cases, we just return the aggregation as is. + return agg + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e00d11d91..ae633b5bc 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 6d74fea0f..83cf63488 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(1:numeric), 'n_suppliers': SUM(n_suppliers)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index e9ba7c35d..dcec64b51 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,7 +12,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 1bb9d040a..35f5575ff 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(1:numeric), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index bf725b8e1..da9a367f2 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index a30d8068c..b03a4111a 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(1:numeric), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index d4b65fe8d..88aaeef4c 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(1:numeric)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index c777a3423..8540424fb 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -2,7 +2,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,7 +12,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(1:numeric), 'sum_n_rows': SUM(n_rows)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 6c7d2bd5a..79e6735e6 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(1:numeric), 'nation_name_0': ANYTHING(n_name)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name_0': ANYTHING(n_name)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 9bca98b3e..cfbaa563a 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -2,7 +2,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index f83a5a536..0100d1072 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -2,7 +2,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases' JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': SUM(1:numeric)}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index a1e3920bd..f8c87d703 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -5,7 +5,7 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': SUM(1:numeric), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) From a69d7640a07331d1ae06a894d56b7bf548847b8c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 00:48:05 -0400 Subject: [PATCH 18/97] Cleanup merge projects --- pydough/conversion/merge_projects.py | 69 ++++--------------- .../sqlite_udf_percent_epsilon.txt | 4 +- .../time_threshold_reached.txt | 11 ++- .../global_acctbal_breakdown_sqlite.sql | 7 +- .../nation_acctbal_breakdown_sqlite.sql | 9 ++- .../region_acctbal_breakdown_sqlite.sql | 9 ++- .../sqlite_udf_percent_epsilon_sqlite.sql | 52 ++++++-------- .../time_threshold_reached_ansi.sql | 5 +- .../time_threshold_reached_sqlite.sql | 5 +- 9 files changed, 55 insertions(+), 116 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index cc5eac798..8c400cb86 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -188,46 +188,13 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # no longer a projection. while isinstance(node.input, Project): child_project: Project = node.input - if isinstance(node, RelationalRoot): - # # The columns of the projection can be sucked into the root - # # above it if they are all pass-through/renamings, or if there - # # is no convolution created (only allowed if there are no - # # ordering expressions). - # if all( - # isinstance(expr, ColumnReference) - # for expr in child_project.columns.values() - # ) or ( - # len(node.orderings) == 0 - # and merging_doesnt_create_convolution( - # node.columns, child_project.columns - # ) - # ): - # # Replace all column references in the root's columns with - # # the expressions from the child projection.. - # for idx, (name, expr) in enumerate(node.ordered_columns): - # new_expr = transpose_expression(expr, child_project.columns) - # node.columns[name] = new_expr - # node.ordered_columns[idx] = (name, new_expr) - # # Do the same with the sort expressions. - # for idx, sort_info in enumerate(node.orderings): - # new_expr = transpose_expression( - # sort_info.expr, child_project.columns - # ) - # node.orderings[idx] = ExpressionSortInfo( - # new_expr, sort_info.ascending, sort_info.nulls_first - # ) - # # Delete the child projection from the tree, replacing it - # # with its input. - # node._input = child_project.input - # else: - # # Otherwise, halt the merging process since it is no longer - # # possible to merge the children of this root into it. - # break - # TODO: ADD COMMENTS - if not ( - any(contains_window(expr) for expr in child_project.columns.values()) - and any(contains_window(expr) for expr in node.columns.values()) - ): + # The columns of the projection can be sucked into the parent + # above it unless there is a window function in both. + if not ( + any(contains_window(expr) for expr in child_project.columns.values()) + and any(contains_window(expr) for expr in node.columns.values()) + ): + if isinstance(node, RelationalRoot): # Replace all column references in the root's columns with # the expressions from the child projection. for idx, (name, expr) in enumerate(node.ordered_columns): @@ -245,28 +212,18 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input - else: - # Otherwise, halt the merging process since it is no longer - # possible to merge the children of this root into it. - break - elif isinstance(node, Project): - # The columns of the projection can be sucked into the - # projection above it if they are all pass-through/renamings - # or if there is no convolution created. - if all( - isinstance(expr, ColumnReference) - for expr in child_project.columns.values() - ) or merging_doesnt_create_convolution(node.columns, child_project.columns): + continue + elif isinstance(node, Project): for name, expr in node.columns.items(): new_expr = transpose_expression(expr, child_project.columns) node.columns[name] = new_expr # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input - else: - # Otherwise, halt the merging process since it is no longer - # possible to merge the children of this project into it. - break + continue + # Otherwise, halt the merging process since it is no longer + # possible to merge the children of this project into it. + break # Final round: if there is a project on top of a scan, aggregate, filter, # or limit that only does column pruning/renaming, just push it into the # node. diff --git a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt index 30719771b..e418e0d86 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_epsilon.txt @@ -1,5 +1,5 @@ ROOT(columns=[('pct_e1', ROUND(percentage_expr_5, 4:numeric)), ('pct_e10', ROUND(percentage_expr_6, 4:numeric)), ('pct_e100', ROUND(percentage_expr_7, 4:numeric)), ('pct_e1000', ROUND(percentage_expr_8, 4:numeric)), ('pct_e10000', ROUND(percentage_expr_9, 4:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1:numeric)), 'percentage_expr_6': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10:numeric)), 'percentage_expr_7': PERCENTAGE(EPSILON(o_totalprice, global_avg, 100:numeric)), 'percentage_expr_8': PERCENTAGE(EPSILON(o_totalprice, global_avg, 1000:numeric)), 'percentage_expr_9': PERCENTAGE(EPSILON(o_totalprice, global_avg, 10000:numeric))}) - PROJECT(columns={'global_avg': RELAVG(args=[o_totalprice], partition=[], order=[]), 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'percentage_expr_5': PERCENTAGE(expr_5), 'percentage_expr_6': PERCENTAGE(expr_6), 'percentage_expr_7': PERCENTAGE(expr_7), 'percentage_expr_8': PERCENTAGE(expr_8), 'percentage_expr_9': PERCENTAGE(expr_9)}) + PROJECT(columns={'expr_5': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 1:numeric), 'expr_6': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 10:numeric), 'expr_7': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 100:numeric), 'expr_8': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 1000:numeric), 'expr_9': EPSILON(o_totalprice, RELAVG(args=[o_totalprice], partition=[], order=[]), 10000:numeric)}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index 8b94ac8f2..ba32986b7 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,7 +1,6 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) - FILTER(condition=RANKING(args=[], partition=[txn_day], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) - FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[txn_day], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[txn_day], order=[]), 'sbTxDateTime': sbTxDateTime, 'txn_day': txn_day}) - PROJECT(columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'txn_day': DATETIME(sbTxDateTime, 'start of day':string)}) - FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) - SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) + FILTER(condition=RANKING(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) + FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime}) + PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[]), 'sbTxDateTime': sbTxDateTime}) + FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) + SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql index c3cabeb5a..4cfe3d75f 100644 --- a/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/global_acctbal_breakdown_sqlite.sql @@ -39,13 +39,12 @@ WITH _t0 AS ( THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END AS non_negative_acctbal + c_acctbal FROM tpch.customer ) SELECT - COUNT(negative_acctbal) AS n_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, AVG(expr_7) AS median_red_acctbal, AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql index 72c55d250..e53fbbd46 100644 --- a/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_sqlite.sql @@ -39,17 +39,16 @@ WITH _t2 AS ( THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - c_nationkey, - CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END AS non_negative_acctbal + c_acctbal, + c_nationkey FROM tpch.customer ), _s3 AS ( SELECT AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal, AVG(expr_7) AS median_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, - COUNT(negative_acctbal) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, c_nationkey FROM _t2 GROUP BY diff --git a/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql b/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql index 4883afc1d..74fde6005 100644 --- a/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql +++ b/tests/test_sql_refsols/region_acctbal_breakdown_sqlite.sql @@ -39,9 +39,8 @@ WITH _t1 AS ( THEN CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END ELSE NULL END AS expr_7, - nation.n_regionkey, - CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END AS negative_acctbal, - CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END AS non_negative_acctbal + customer.c_acctbal, + nation.n_regionkey FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey @@ -50,8 +49,8 @@ WITH _t1 AS ( AVG(expr_5) AS median_black_acctbal, AVG(expr_6) AS median_overall_acctbal, AVG(expr_7) AS median_red_acctbal, - COUNT(non_negative_acctbal) AS n_black_acctbal, - COUNT(negative_acctbal) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, n_regionkey FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql index 6375ec3a9..3af0c6f54 100644 --- a/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_percent_epsilon_sqlite.sql @@ -1,40 +1,28 @@ WITH _t1 AS ( SELECT - AVG(o_totalprice) OVER () AS global_avg, - o_totalprice + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 1 AS expr_5, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 10 AS expr_6, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 100 AS expr_7, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 1000 AS expr_8, + ABS(AVG(o_totalprice) OVER () - o_totalprice) <= 10000 AS expr_9 FROM tpch.orders WHERE CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1992 ) SELECT - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 1 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e1, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 10 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e10, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 100 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e100, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 1000 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e1000, - ROUND( - CAST(( - 100.0 * SUM(CASE WHEN ABS(global_avg - o_totalprice) <= 10000 THEN 1 END) - ) AS REAL) / COUNT(*), - 4 - ) AS pct_e10000 + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_5 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e1, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_6 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e10, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_7 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e100, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_8 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e1000, + ROUND(CAST(( + 100.0 * SUM(CASE WHEN expr_9 THEN 1 END) + ) AS REAL) / COUNT(*), 4) AS pct_e10000 FROM _t1 diff --git a/tests/test_sql_refsols/time_threshold_reached_ansi.sql b/tests/test_sql_refsols/time_threshold_reached_ansi.sql index 853086170..bb5d82ba1 100644 --- a/tests/test_sql_refsols/time_threshold_reached_ansi.sql +++ b/tests/test_sql_refsols/time_threshold_reached_ansi.sql @@ -3,8 +3,7 @@ WITH _t3 AS ( ( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, - sbtxdatetime, - DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) AS txn_day + sbtxdatetime FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 @@ -15,7 +14,7 @@ WITH _t3 AS ( WHERE pct_of_day >= 50.0 QUALIFY - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day NULLS LAST) = 1 + ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY pct_of_day NULLS LAST) = 1 ) SELECT sbtxdatetime AS date_time diff --git a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql index 5e47efb24..a21717a9c 100644 --- a/tests/test_sql_refsols/time_threshold_reached_sqlite.sql +++ b/tests/test_sql_refsols/time_threshold_reached_sqlite.sql @@ -3,15 +3,14 @@ WITH _t3 AS ( CAST(( 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) AS REAL) / SUM(sbtxshares) OVER (PARTITION BY DATE(sbtxdatetime, 'start of day')) AS pct_of_day, - sbtxdatetime, - DATE(sbtxdatetime, 'start of day') AS txn_day + sbtxdatetime FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 ), _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (PARTITION BY txn_day ORDER BY pct_of_day) AS _w + ROW_NUMBER() OVER (PARTITION BY DATE(sbtxdatetime, 'start of day') ORDER BY pct_of_day) AS _w FROM _t3 WHERE pct_of_day >= 50.0 From 13d9844430e627cb4082c1882848a9cb6fd529db Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 13:56:21 -0400 Subject: [PATCH 19/97] Added some adjacent aggregaiton merging --- pydough/conversion/projection_pullup.py | 71 +++++++++++++++++++ tests/test_plan_refsols/common_prefix_b.txt | 5 +- tests/test_plan_refsols/common_prefix_c.txt | 11 ++- tests/test_plan_refsols/common_prefix_d.txt | 5 +- tests/test_plan_refsols/common_prefix_f.txt | 5 +- tests/test_plan_refsols/common_prefix_g.txt | 5 +- tests/test_plan_refsols/common_prefix_h.txt | 11 ++- tests/test_plan_refsols/correl_14.txt | 29 ++++---- tests/test_plan_refsols/correl_15.txt | 35 +++++---- tests/test_plan_refsols/correl_16.txt | 25 ++++--- tests/test_plan_refsols/correl_18.txt | 19 +++-- .../multi_partition_access_2.txt | 9 +-- .../multi_partition_access_5.txt | 8 +-- .../multi_partition_access_6.txt | 41 +++++------ 14 files changed, 164 insertions(+), 115 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index b4f013a76..1857bf5cd 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -27,6 +27,7 @@ add_input_name, apply_substitution, contains_window, + transpose_expression, ) from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, @@ -304,6 +305,75 @@ def simplify_agg(agg: CallExpression) -> CallExpression: return agg +def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: + """ + TODO + """ + if not isinstance(node.input, Aggregate): + return node + + input_agg: Aggregate = node.input + + top_keys: set[RelationalExpression] = { + transpose_expression(expr, input_agg.columns) for expr in node.keys.values() + } + bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) + + # print() + # print("Top keys:") + # for key in top_keys: + # print(f" {key.to_string(True)}") + # print("Bottom keys:") + # for key in bottom_keys: + # print(f" {key.to_string(True)}") + + if len(top_keys - bottom_keys) > 0: + return node + + bottom_only_keys: set[RelationalExpression] = bottom_keys - top_keys + + new_keys: dict[str, RelationalExpression] = { + name: transpose_expression(expr, input_agg.columns) + for name, expr in node.keys.items() + } + new_aggs: dict[str, CallExpression] = {} + input_expr: RelationalExpression + for agg_name, agg_expr in node.aggregations.items(): + match agg_expr.op: + case pydop.COUNT if len(agg_expr.inputs) == 0: + if len(bottom_only_keys) == 0: + new_aggs[agg_name] = CallExpression( + op=pydop.ANYTHING, + return_type=agg_expr.data_type, + inputs=[LiteralExpression(1, agg_expr.data_type)], + ) + elif len(bottom_only_keys) == 1: + new_aggs[agg_name] = CallExpression( + op=pydop.NDISTINCT, + return_type=agg_expr.data_type, + inputs=[next(iter(bottom_only_keys))], + ) + else: + return node + case pydop.SUM: + input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) + if isinstance(input_expr, CallExpression) and input_expr.op in ( + pydop.SUM, + pydop.COUNT, + ): + new_aggs[agg_name] = input_expr + else: + return node + case _: + return node + + return Aggregate( + input=input_agg.input, + keys=new_keys, + aggregations=new_aggs, + ) + + def pullup_projections(node: RelationalNode) -> RelationalNode: """ TODO @@ -325,6 +395,7 @@ def pullup_projections(node: RelationalNode) -> RelationalNode: pull_project_into_limit(node) return pull_non_columns(node) case Aggregate(): + node = merge_adjacent_aggregations(node) return pull_project_into_aggregate(node) case _: return node diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 83cf63488..d4bfb2b5b 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_cust SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index dcec64b51..c7f3f076b 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -11,9 +11,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 35f5575ff..a9e20fe8d 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -20,6 +20,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index b03a4111a..8de55d20c 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_ SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 88aaeef4c..35ed92de0 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -7,6 +7,5 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 8540424fb..fc806c3fc 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -11,9 +11,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_T SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 93bd2de6c..0c0d15dde 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,16 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) + FILTER(condition=p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 264641ef8..dda6f11b4 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,19 +1,18 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) - AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) - FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={}, aggregations={'n_rows': NDISTINCT(s_suppkey)}) + FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < sum_p_retailprice / sum_expr_1, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) + FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=p_container == 'LG DRUM':string, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_16.txt b/tests/test_plan_refsols/correl_16.txt index 31ea9a869..0b8509b58 100644 --- a/tests/test_plan_refsols/correl_16.txt +++ b/tests/test_plan_refsols/correl_16.txt @@ -1,14 +1,13 @@ ROOT(columns=[('n', n)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n': COUNT()}) - AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) - FILTER(condition=PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) + FILTER(condition=PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 5f6ca684d..646b9855d 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,11 +1,10 @@ ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_n_above_avg': SUM(n_above_avg)}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_above_avg': COUNT()}) - FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) - FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) - AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) + FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) + FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) + AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 0bcb1f4aa..e42e62646 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -9,8 +9,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) @@ -21,8 +20,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) @@ -33,8 +31,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index bca9f137e..6c6ea66d7 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -6,16 +6,14 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 5ee7193e5..69f9b27a4 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -3,38 +3,33 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': SUM(n_cust_type_trans)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) @@ -44,15 +39,13 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) From c4971274679453428fa1096c569dcd82eafcf2a3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 14 Jul 2025 14:03:40 -0400 Subject: [PATCH 20/97] Added min/min, max/max, anything/anything cases --- pydough/conversion/projection_pullup.py | 9 +++++++++ tests/test_plan_refsols/multi_partition_access_4.txt | 5 ++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 1857bf5cd..cf1658866 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -364,6 +364,15 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: new_aggs[agg_name] = input_expr else: return node + case pydop.MIN | pydop.MAX | pydop.ANYTHING: + input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) + if ( + isinstance(input_expr, CallExpression) + and input_expr.op == agg_expr.op + ): + new_aggs[agg_name] = input_expr + else: + return node case _: return node diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 01a79cbf9..f64e8d845 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -2,9 +2,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=sbTxShares < cust_max_shares & sbTxShares >= cust_ticker_max_shares, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t0.cust_ticker_max_shares, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(cust_ticker_max_shares)}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) From f74fc5c1a2a5b721bf6c2d585411a2344718428c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 11:43:24 -0400 Subject: [PATCH 21/97] Adding more aggregation simplification and comments --- pydough/conversion/projection_pullup.py | 59 ++++++++++++------- .../aggregation_analytics_2.txt | 4 +- .../aggregation_analytics_3.txt | 4 +- tests/test_plan_refsols/correl_26.txt | 6 +- tests/test_plan_refsols/correl_27.txt | 6 +- tests/test_plan_refsols/correl_28.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 4 +- .../epoch_event_gap_per_era.txt | 4 +- .../epoch_intra_season_searches.txt | 6 +- .../epoch_event_gap_per_era_ansi.sql | 2 +- .../epoch_event_gap_per_era_sqlite.sql | 2 +- .../epoch_intra_season_searches_ansi.sql | 10 ++-- .../epoch_intra_season_searches_sqlite.sql | 10 ++-- 13 files changed, 69 insertions(+), 54 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index cf1658866..3ba23982a 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -268,41 +268,64 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: ): ref_expr: ColumnReference = ColumnReference(name, expr.data_type) substitutions[ref_expr] = new_expr + new_columns: dict[str, RelationalExpression] = { + name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() + } new_keys: dict[str, RelationalExpression] = { name: apply_substitution(expr, substitutions, {}) for name, expr in node.keys.items() } new_aggs: dict[str, CallExpression] = {} + out_expr: RelationalExpression + new_agg_expr: CallExpression | None for name, expr in node.aggregations.items(): new_expr = apply_substitution(expr, substitutions, {}) assert isinstance(new_expr, CallExpression) - new_aggs[name] = simplify_agg(new_expr) - return Aggregate( + out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) + new_columns[name] = out_expr + if new_agg_expr is not None: + new_aggs[name] = new_agg_expr + agg: Aggregate = Aggregate( input=node.input, keys=new_keys, aggregations=new_aggs, ) + return Project(input=agg, columns=new_columns) -def simplify_agg(agg: CallExpression) -> CallExpression: +def simplify_agg( + keys: dict[str, RelationalExpression], agg: CallExpression, name: str +) -> tuple[RelationalExpression, CallExpression | None]: """ TODO """ + reverse_keys: dict[RelationalExpression, str] = { + expr: name for name, expr in keys.items() + } + out_ref: RelationalExpression = ColumnReference(name, agg.data_type) arg: RelationalExpression - if agg.op == pydop.SUM: + if agg.op in (pydop.SUM, pydop.COUNT) and len(agg.inputs) == 1: arg = agg.inputs[0] - if ( - isinstance(arg, LiteralExpression) - and isinstance(arg.data_type, NumericType) - and arg.value == 1 + if isinstance(arg, LiteralExpression) and isinstance( + arg.data_type, NumericType ): - return CallExpression( - op=pydop.COUNT, - return_type=agg.data_type, - inputs=[], - ) + if (agg.op == pydop.SUM and arg.value == 1) or ( + agg.op == pydop.COUNT and arg.value is not None + ): + return out_ref, CallExpression( + op=pydop.COUNT, + return_type=agg.data_type, + inputs=[], + ) + + # If the aggregation is on a key, we can just return the key. + if agg.op in (pydop.SUM, pydop.MIN, pydop.MAX, pydop.ANYTHING): + arg = agg.inputs[0] + if arg in reverse_keys: + return ColumnReference(reverse_keys[arg], agg.data_type), None + # In all other cases, we just return the aggregation as is. - return agg + return out_ref, agg def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: @@ -319,14 +342,6 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: } bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) - # print() - # print("Top keys:") - # for key in top_keys: - # print(f" {key.to_string(True)}") - # print("Bottom keys:") - # for key in bottom_keys: - # print(f" {key.to_string(True)}") - if len(top_keys - bottom_keys) > 0: return node diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index af9436cf0..1d8f376f1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index cf4d7e4f7..df3d64e66 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_ps_partkey': ANYTHING(ps_partkey), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 79e6735e6..4cb7eba80 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(nation_name_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases, 'nation_name_0': t0.nation_name_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name_0': ANYTHING(n_name)}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': SUM(n_selected_purchases), 'nation_name': ANYTHING(n_name)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_selected_purchases': t0.n_selected_purchases}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index cfbaa563a..a4c0eb75f 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 0100d1072..df5869922 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_selected_purchases', sum_sum_agg_0)], orderings=[(anything_anything_n_name):asc_first]) JOIN(condition=t0.anything_anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_anything_n_name': t0.anything_anything_n_name, 'sum_sum_agg_0': t0.sum_sum_agg_0}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) - AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(n_name), 'anything_anything_n_regionkey': ANYTHING(anything_n_regionkey), 'sum_sum_agg_0': SUM(sum_agg_0)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_regionkey': t0.anything_n_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'sum_agg_0': t0.sum_agg_0}) + AGGREGATE(keys={'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey), 'sum_agg_0': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 2e633f4f9..11018a2c2 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_nationkey': ANYTHING(n_nationkey), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) FILTER(condition=c_acctbal > avg_cust_acctbal, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'r_name': r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'c_acctbal': t1.c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) diff --git a/tests/test_plan_refsols/epoch_event_gap_per_era.txt b/tests/test_plan_refsols/epoch_event_gap_per_era.txt index 8626c8529..da5390394 100644 --- a/tests/test_plan_refsols/epoch_event_gap_per_era.txt +++ b/tests/test_plan_refsols/epoch_event_gap_per_era.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('era_name', era_name), ('avg_event_gap', avg_event_gap)], orderings=[(anything_er_start_year):asc_first]) - AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap), 'era_name': ANYTHING(er_name)}) +ROOT(columns=[('era_name', er_name), ('avg_event_gap', avg_event_gap)], orderings=[(anything_er_start_year):asc_first]) + AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap)}) PROJECT(columns={'day_gap': DATEDIFF('days':string, PREV(args=[ev_dt], partition=[er_name], order=[(ev_dt):asc_last]), ev_dt), 'er_name': er_name, 'er_start_year': er_start_year}) JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 0e76d9871..1f8616b5a 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('season_name', anything_s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(anything_s_name):asc_first]) - JOIN(condition=t0.anything_s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'anything_s_name': t0.anything_s_name, 'n_rows': t1.n_rows, 'sum_is_intra_season': t1.sum_is_intra_season}) - AGGREGATE(keys={'s_name': s_name}, aggregations={'anything_s_name': ANYTHING(s_name), 'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) +ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) + JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) + AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql index def99e785..584f057e8 100644 --- a/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_ansi.sql @@ -13,7 +13,7 @@ WITH _t1 AS ( AND eras.er_start_year <= EXTRACT(YEAR FROM CAST(events.ev_dt AS DATETIME)) ) SELECT - ANY_VALUE(er_name) AS era_name, + er_name AS era_name, AVG(day_gap) AS avg_event_gap FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql index 90e9de7f6..bbaebfead 100644 --- a/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_sqlite.sql @@ -16,7 +16,7 @@ WITH _t1 AS ( AND eras.er_start_year <= CAST(STRFTIME('%Y', events.ev_dt) AS INTEGER) ) SELECT - MAX(er_name) AS era_name, + er_name AS era_name, AVG(day_gap) AS avg_event_gap FROM _t1 GROUP BY diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql index 5d79982d0..e10c0c411 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_ansi.sql @@ -34,11 +34,11 @@ WITH _s0 AS ( searches.search_id ), _s16 AS ( SELECT - ANY_VALUE(_s0.s_name) AS anything_s_name, COUNT(*) AS n_rows, SUM(( NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season + )) AS sum_is_intra_season, + _s0.s_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = EXTRACT(MONTH FROM CAST(searches.search_ts AS DATETIME)) @@ -68,7 +68,7 @@ WITH _s0 AS ( _s10.s_name ) SELECT - _s16.anything_s_name AS season_name, + _s16.s_name AS season_name, ROUND(( 100.0 * COALESCE(_s16.sum_is_intra_season, 0) ) / _s16.n_rows, 2) AS pct_season_searches, @@ -77,6 +77,6 @@ SELECT ) / COALESCE(_s17.n_rows, 0), 2) AS pct_event_searches FROM _s16 AS _s16 LEFT JOIN _s17 AS _s17 - ON _s16.anything_s_name = _s17.s_name + ON _s16.s_name = _s17.s_name ORDER BY - _s16.anything_s_name + _s16.s_name diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql index 39f1ed3f2..7e86ea480 100644 --- a/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql +++ b/tests/test_sql_refsols/epoch_intra_season_searches_sqlite.sql @@ -36,11 +36,11 @@ WITH _s0 AS ( searches.search_id ), _s16 AS ( SELECT - MAX(_s0.s_name) AS anything_s_name, COUNT(*) AS n_rows, SUM(( NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 - )) AS sum_is_intra_season + )) AS sum_is_intra_season, + _s0.s_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.s_month1 = CAST(STRFTIME('%m', searches.search_ts) AS INTEGER) @@ -72,7 +72,7 @@ WITH _s0 AS ( _s10.s_name ) SELECT - _s16.anything_s_name AS season_name, + _s16.s_name AS season_name, ROUND(CAST(( 100.0 * COALESCE(_s16.sum_is_intra_season, 0) ) AS REAL) / _s16.n_rows, 2) AS pct_season_searches, @@ -84,6 +84,6 @@ SELECT ) AS pct_event_searches FROM _s16 AS _s16 LEFT JOIN _s17 AS _s17 - ON _s16.anything_s_name = _s17.s_name + ON _s16.s_name = _s17.s_name ORDER BY - _s16.anything_s_name + _s16.s_name From d2604287b6edd408699ddd012cc4fc1a788f1554 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 13:36:28 -0400 Subject: [PATCH 22/97] Added more aggregation simplification + tests --- pydough/conversion/projection_pullup.py | 175 ++++++++++++++++-- tests/test_pipeline_defog_custom.py | 123 ++++++++++++ .../agg_simplification_1.txt | 4 + .../agg_simplification_2.txt | 5 + .../aggregation_analytics_2.txt | 25 +-- .../aggregation_analytics_3.txt | 25 +-- .../month_year_sliding_windows.txt | 6 +- ...ograph_battery_failure_rates_anomalies.txt | 4 +- ..._error_rate_sun_set_by_factory_country.txt | 17 +- .../technograph_incident_rate_per_brand.txt | 4 +- .../technograph_most_unreliable_products.txt | 4 +- .../simple_pydough_functions.py | 105 +++++++++++ ...h_battery_failure_rates_anomalies_ansi.sql | 4 +- ...battery_failure_rates_anomalies_sqlite.sql | 4 +- ...r_rate_sun_set_by_factory_country_ansi.sql | 6 +- ...rate_sun_set_by_factory_country_sqlite.sql | 6 +- ...chnograph_incident_rate_per_brand_ansi.sql | 2 +- ...nograph_incident_rate_per_brand_sqlite.sql | 2 +- ...hnograph_most_unreliable_products_ansi.sql | 6 +- ...ograph_most_unreliable_products_sqlite.sql | 6 +- 20 files changed, 456 insertions(+), 77 deletions(-) create mode 100644 tests/test_plan_refsols/agg_simplification_1.txt create mode 100644 tests/test_plan_refsols/agg_simplification_2.txt diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 3ba23982a..a24f2e6c3 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -32,7 +32,7 @@ from pydough.relational.relational_expressions.column_reference_finder import ( ColumnReferenceFinder, ) -from pydough.types import NumericType +from pydough.types import BooleanType, NumericType from .merge_projects import merge_adjacent_projects @@ -96,7 +96,7 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - return Project(input=node, columns=new_project_columns) + return merge_adjacent_projects(Project(input=node, columns=new_project_columns)) def pull_project_into_join(node: Join, input_index: int) -> None: @@ -290,7 +290,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: keys=new_keys, aggregations=new_aggs, ) - return Project(input=agg, columns=new_columns) + return merge_adjacent_projects(Project(input=agg, columns=new_columns)) def simplify_agg( @@ -304,25 +304,138 @@ def simplify_agg( } out_ref: RelationalExpression = ColumnReference(name, agg.data_type) arg: RelationalExpression - if agg.op in (pydop.SUM, pydop.COUNT) and len(agg.inputs) == 1: + + zero_expr: RelationalExpression = LiteralExpression(0, agg.data_type) + one_expr: RelationalExpression = LiteralExpression(1, agg.data_type) + count_star: CallExpression = CallExpression( + op=pydop.COUNT, + return_type=NumericType(), + inputs=[], + ) + + # Can optimize SUM, COUNT and NDISTINCT aggregations on literals. + if ( + agg.op in (pydop.SUM, pydop.COUNT, pydop.NDISTINCT) + and len(agg.inputs) == 1 + and isinstance(agg.inputs[0], LiteralExpression) + ): arg = agg.inputs[0] - if isinstance(arg, LiteralExpression) and isinstance( - arg.data_type, NumericType + if agg.op == pydop.SUM and ( + isinstance(arg.data_type, NumericType) or arg.value is None ): - if (agg.op == pydop.SUM and arg.value == 1) or ( - agg.op == pydop.COUNT and arg.value is not None - ): - return out_ref, CallExpression( - op=pydop.COUNT, + # SUM(NULL) -> NULL + if arg.value is None: + return arg, None + + # SUM(0) -> 0 + elif arg.value == 0: + return zero_expr, None + + # SUM(1) -> COUNT(*) + # SUM(n) = COUNT(*) * n + elif arg.value != 1: + out_ref = CallExpression( + op=pydop.MUL, return_type=agg.data_type, - inputs=[], + inputs=[out_ref, LiteralExpression(arg.value, agg.data_type)], ) + return out_ref, count_star - # If the aggregation is on a key, we can just return the key. - if agg.op in (pydop.SUM, pydop.MIN, pydop.MAX, pydop.ANYTHING): + elif agg.op == pydop.COUNT: + # COUNT(NULL) -> 0 + if arg.value is None: + return zero_expr, None + + # COUNT(n) -> COUNT(*) + else: + return out_ref, count_star + + elif agg.op == pydop.NDISTINCT: + # NDISTINCT(NULL) -> 0 + # NDISTINCT(n) -> 1 + return zero_expr if arg.value is None else one_expr, None + + # SUM(DEFAULT_TO(x, 0)) -> DEFAULT_TO(SUM(x), 0) + if ( + agg.op == pydop.SUM + and len(agg.inputs) == 1 + and isinstance(agg.inputs[0], CallExpression) + ): + if ( + agg.inputs[0].op == pydop.DEFAULT_TO + and isinstance(agg.inputs[0].inputs[1], LiteralExpression) + and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) + and agg.inputs[0].inputs[1].value == 0 + ): + return CallExpression( + pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] + ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) + + # If the aggregation is on a key, we can just use the key. + if ( + agg.op + in ( + pydop.SUM, + pydop.MIN, + pydop.MAX, + pydop.ANYTHING, + pydop.AVG, + pydop.QUANTILE, + pydop.MEDIAN, + pydop.COUNT, + pydop.NDISTINCT, + ) + and len(agg.inputs) == 1 + ): arg = agg.inputs[0] if arg in reverse_keys: - return ColumnReference(reverse_keys[arg], agg.data_type), None + key_ref: RelationalExpression = ColumnReference( + reverse_keys[arg], agg.data_type + ) + + # COUNT(key) -> COUNT(*) * INTEGER(PRESENT(key)) + if agg.op == pydop.COUNT: + return CallExpression( + pydop.MUL, + agg.data_type, + [ + out_ref, + CallExpression( + pydop.INTEGER, + NumericType(), + [CallExpression(pydop.PRESENT, BooleanType(), [key_ref])], + ), + ], + ), count_star + + # NDISTINCT(key) -> INTEGER(PRESENT(key)) + if agg.op == pydop.NDISTINCT: + return CallExpression( + pydop.INTEGER, + NumericType(), + [CallExpression(pydop.PRESENT, BooleanType(), [key_ref])], + ), None + + # Otherwise, FUNC(key) -> key + return key_ref, None + + # If running a selection aggregation on a literal, can just return the + # input. + if ( + agg.op + in ( + pydop.MIN, + pydop.MAX, + pydop.ANYTHING, + pydop.AVG, + pydop.MEDIAN, + pydop.QUANTILE, + ) + and len(agg.inputs) >= 1 + ): + arg = agg.inputs[0] + if isinstance(arg, LiteralExpression): + return arg, None # In all other cases, we just return the aggregation as is. return out_ref, agg @@ -356,41 +469,67 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: for agg_name, agg_expr in node.aggregations.items(): match agg_expr.op: case pydop.COUNT if len(agg_expr.inputs) == 0: + # top_keys: {x, y} + # bottom_keys: {x, y} + # COUNT(*) -> ANYTHING(1) if len(bottom_only_keys) == 0: new_aggs[agg_name] = CallExpression( op=pydop.ANYTHING, return_type=agg_expr.data_type, inputs=[LiteralExpression(1, agg_expr.data_type)], ) + + # top_keys: {x, y} + # bottom_keys: {x, y, z} + # COUNT(*) -> NDISTINCT(z) elif len(bottom_only_keys) == 1: new_aggs[agg_name] = CallExpression( op=pydop.NDISTINCT, return_type=agg_expr.data_type, inputs=[next(iter(bottom_only_keys))], ) + + # Otherwise, the merge fails. else: return node + case pydop.SUM: + # SUM(SUM(x)) -> SUM(x) + # SUM(COUNT(x)) -> COUNT(x) input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) if isinstance(input_expr, CallExpression) and input_expr.op in ( pydop.SUM, pydop.COUNT, ): new_aggs[agg_name] = input_expr + + # Otherwise, the merge fails. else: return node + case pydop.MIN | pydop.MAX | pydop.ANYTHING: + # MIN(MIN(x)) -> MIN(x) + # MIN(ANYTHING(x)) -> MIN(x) + # MAX(MAX(x)) -> MAX(x) + # MAX(ANYTHING(x)) -> MAX(x) + # ANYTHING(ANYTHING(x)) -> ANYTHING(x) input_expr = transpose_expression(agg_expr.inputs[0], input_agg.columns) - if ( - isinstance(input_expr, CallExpression) - and input_expr.op == agg_expr.op + if isinstance(input_expr, CallExpression) and input_expr.op in ( + agg_expr.op, + pydop.ANYTHING, ): new_aggs[agg_name] = input_expr + + # Otherwise, the merge fails. else: return node + + # Otherwise, the merge fails. case _: return node + # If none of the aggregations caused a merge failure, we can return a new + # Aggregate node using the top keys and the merged aggregation calls. return Aggregate( input=input_agg.input, keys=new_keys, diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index f2ff13365..450cdc43c 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -37,6 +37,8 @@ bad_rpad_8, ) from tests.test_pydough_functions.simple_pydough_functions import ( + agg_simplification_1, + agg_simplification_2, cumulative_stock_analysis, exponentiation, find, @@ -1419,6 +1421,127 @@ def get_day_of_week( ), id="window_sliding_frame_relsum", ), + pytest.param( + PyDoughPandasTest( + agg_simplification_1, + "Broker", + lambda: pd.DataFrame( + { + "aug_exchange": [None, 4, 6, 8], + "su1": [3, 4, 10, 4], + "su2": [6, 8, 20, 8], + "su3": [-3, -4, -10, -4], + "su4": [-9, -12, -30, -12], + "su5": [0, 0, 0, 0], + "su6": [1.5, 2.0, 5.0, 2.0], + "su7": [0, 0, 0, 0], + "su8": [0, 4, 6, 8], + "co1": [3, 4, 10, 4], + "co2": [3, 4, 10, 4], + "co3": [3, 4, 10, 4], + "co4": [3, 4, 10, 4], + "co5": [3, 4, 10, 4], + "co6": [3, 4, 10, 4], + "co7": [0, 0, 0, 0], + "co8": [0, 4, 10, 4], + "nd1": [1, 1, 1, 1], + "nd2": [1, 1, 1, 1], + "nd3": [1, 1, 1, 1], + "nd4": [1, 1, 1, 1], + "nd5": [1, 1, 1, 1], + "nd6": [1, 1, 1, 1], + "nd7": [0, 0, 0, 0], + "nd8": [0, 1, 1, 1], + "av1": [1, 1, 1, 1], + "av2": [2, 2, 2, 2], + "av3": [-1, -1, -1, -1], + "av4": [-3, -3, -3, -3], + "av5": [0, 0, 0, 0], + "av6": [0.5, 0.5, 0.5, 0.5], + "av7": [None, None, None, None], + "av8": [None, 4, 6, 8], + "mi1": [1, 1, 1, 1], + "mi2": [2, 2, 2, 2], + "mi3": [-1, -1, -1, -1], + "mi4": [-3, -3, -3, -3], + "mi5": [0, 0, 0, 0], + "mi6": [0.5, 0.5, 0.5, 0.5], + "mi7": [None, None, None, None], + "mi8": [None, 4, 6, 8], + "ma1": [1, 1, 1, 1], + "ma2": [2, 2, 2, 2], + "ma3": [-1, -1, -1, -1], + "ma4": [-3, -3, -3, -3], + "ma5": [0, 0, 0, 0], + "ma6": [0.5, 0.5, 0.5, 0.5], + "ma7": [None, None, None, None], + "ma8": [None, 4, 6, 8], + "an1": [1, 1, 1, 1], + "an2": [2, 2, 2, 2], + "an3": [-1, -1, -1, -1], + "an4": [-3, -3, -3, -3], + "an5": [0, 0, 0, 0], + "an6": [0.5, 0.5, 0.5, 0.5], + "an7": [None, None, None, None], + "an8": [None, 4, 6, 8], + "me1": [1.0, 1.0, 1.0, 1.0], + "me2": [2.0, 2.0, 2.0, 2.0], + "me3": [-1.0, -1.0, -1.0, -1.0], + "me4": [-3.0, -3.0, -3.0, -3.0], + "me5": [0.0, 0.0, 0.0, 0.0], + "me6": [0.5, 0.5, 0.5, 0.5], + "me7": [None, None, None, None], + "me8": [None, 4.0, 6.0, 8.0], + "qu1": [1, 1, 1, 1], + "qu2": [2, 2, 2, 2], + "qu3": [-1, -1, -1, -1], + "qu4": [-3, -3, -3, -3], + "qu5": [0, 0, 0, 0], + "qu6": [0.5, 0.5, 0.5, 0.5], + "qu7": [None, None, None, None], + "qu8": [None, 4, 6, 8], + } + ), + "agg_simplification_1", + order_sensitive=True, + ), + id="agg_simplification_1", + ), + pytest.param( + PyDoughPandasTest( + agg_simplification_2, + "Broker", + lambda: pd.DataFrame( + { + "state": ["CA", "FL", "NJ", "NY", "TX"], + "a1": [2, 1, 1, 1, 1], + "a2": [7, 3, 3, 4, 3], + "a3": [1, 0, 0, 3, 0], + "a4": [636307, 99303, 26403, 40008, 225000], + "a5": [ + "555-123-4567", + "555-370-2648", + "555-246-1357", + "555-135-7902", + "555-246-8135", + ], + "a6": [ + "555-864-2319", + "555-864-2319", + "555-987-6543", + "555-987-6543", + "555-753-1904", + ], + "a7": ["ca", "fl", "nj", "ny", "tx"], + "a8": ["ca", "fl", "nj", "ny", "tx"], + "a9": ["ca", "fl", "nj", "ny", "tx"], + } + ), + "agg_simplification_2", + order_sensitive=True, + ), + id="agg_simplification_2", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt new file mode 100644 index 000000000..014c1dbdf --- /dev/null +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) + AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) + PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) + SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt new file mode 100644 index 000000000..917655ad9 --- /dev/null +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', max_anys), ('a9', anything_anys)], orderings=[(sbCustState):asc_first]) + AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'anything_anys': ANYTHING(anys), 'max_anys': MAX(anys), 'max_max_sbCustPhone': MAX(max_sbCustPhone), 'min_anys': MIN(anys), 'min_min_sbCustPhone': MIN(min_sbCustPhone), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_nj': SUM(nj), 'sum_sz': SUM(sum_expr_11)}) + PROJECT(columns={'anys': anything_expr_9, 'max_sbCustPhone': max_sbCustPhone, 'min_sbCustPhone': min_sbCustPhone, 'n_rows': n_rows, 'nj': count_expr_10, 'sbCustState': sbCustState, 'sum_expr_11': sum_expr_11}) + AGGREGATE(keys={'sbCustCity': sbCustCity, 'sbCustState': sbCustState}, aggregations={'anything_expr_9': ANYTHING(LOWER(sbCustState)), 'count_expr_10': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'max_sbCustPhone': MAX(sbCustPhone), 'min_sbCustPhone': MIN(sbCustPhone), 'n_rows': COUNT(), 'sum_expr_11': SUM(INTEGER(sbCustPostalCode))}) + SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..43a1d3b81 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,14 +1,15 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..2e100b381 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,14 +1,15 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 333521c0c..cc7064f09 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,9 +1,9 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(DEFAULT_TO(sum_o_totalprice, 0:numeric))}) + FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(sum_o_totalprice)}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index df78044d3..050999da6 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) - AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) + AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index 0c76e958a..314981554 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,11 +1,12 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) - AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) - AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) + PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) + AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index fdf768b85..a64d4f50c 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) - AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) + AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 19a48c5d6..d27e1aa5b 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(DEFAULT_TO(n_rows, 0:numeric))}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'de_product_id': t0.de_product_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 40729db86..7ed47dc5e 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2983,3 +2983,108 @@ def quantile_function_test_4(): orders_99_percent=QUANTILE(selected_orders.total_price, 0.99), orders_max=QUANTILE(selected_orders.total_price, 1.0), ) + + +def agg_simplification_1(): + # TODO + kwargs = {} + args = [ + tickers.one, + tickers.two, + tickers.negative_one, + tickers.negative_three, + tickers.zero, + tickers.half, + tickers.null, + tickers.aug_exchange, + ] + functions = [ + ("su", SUM), + ("co", COUNT), + ("nd", NDISTINCT), + ("av", AVG), + ("mi", MIN), + ("ma", MAX), + ("an", ANYTHING), + ("me", MEDIAN), + ] + for prefix, func in functions: + for idx, arg in enumerate(args): + kwargs[f"{prefix}{idx + 1}"] = func(arg) + for idx, arg in enumerate(args): + kwargs[f"qu{idx + 1}"] = QUANTILE(arg, (idx + 1) / 10) + return ( + tickers.CALCULATE( + aug_exchange=LENGTH(KEEP_IF(exchange, exchange != "NYSE Arca")) + ) + .CALCULATE( + one=1, + two=2, + negative_one=-1, + negative_three=-3, + zero=0, + half=0.5, + null=None, + ) + .PARTITION(name="exchanges", by=aug_exchange) + .CALCULATE( + aug_exchange, + **kwargs, + ) + .ORDER_BY(aug_exchange.ASC()) + ) + + +""" +SELECT + LENGTH(NULLIF(sbTickerExchange, 'NYSE Arca')) AS aug_exchange, + COUNT(*) +from main.sbticker +GROUP BY 1 +ORDER BY 1 +; + +|3 +NASDAQ|10 +NYSE|4 +Vanguard|4 +[None, 4, 6, 8] +[3, 10, 4, 4] +""" + + +def agg_simplification_2(): + # TODO + return ( + customers.PARTITION(name="cities", by=(city, state)) + .CALCULATE( + n=COUNT(customers), + nj=COUNT(KEEP_IF(customers.name, STARTSWITH(LOWER(customers.name), "j"))), + sz=SUM(INTEGER(customers.postal_code)), + minp=MIN(customers.phone), + maxp=MAX(customers.phone), + anys=ANYTHING(LOWER(customers.state)), + ) + .PARTITION(name="states", by=state) + .CALCULATE( + state, + a1=COUNT(cities), + a2=SUM(cities.n), + a3=SUM(cities.nj), + a4=SUM(cities.sz), + a5=MIN(cities.minp), + a6=MAX(cities.maxp), + a7=MIN(cities.anys), + a8=MAX(cities.anys), + a9=ANYTHING(cities.anys), + ) + .ORDER_BY(state.ASC()) + ) + + +""" +SELECT sbCustState, sbCustCity, COUNT(*), COUNT(CASE WHEN LOWER(sbCustName) LIKE 'j%' THEN 1 END), SUM(CAST(sbCustPostalCode AS INTEGER)), MIN(sbCustPhone), MAX(sbCustPhone), MAX(LOWER(sbCustState)) +FROM main.sbcustomer +GROUP BY 1, 2 +ORDER BY 1, 2; +""" diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index 954197d6d..e665da566 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) / COUNT(*), 2) DESC, + ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 5462388f5..8c6123891 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(CAST(COALESCE(SUM(COALESCE(_s7.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) DESC, + ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql index 99187ac49..103672272 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_ansi.sql @@ -7,9 +7,9 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, - devices.de_production_country_id + COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, + devices.de_production_country_id, + COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql index 3180a6be4..a6b037b5e 100644 --- a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_sqlite.sql @@ -7,9 +7,9 @@ WITH _s3 AS ( in_device_id ), _s5 AS ( SELECT - COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, - devices.de_production_country_id + COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, + devices.de_production_country_id, + COUNT(*) AS n_rows FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql index 96b348eee..ad09ee111 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql index bf9decd5e..864caeee4 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(CAST(COALESCE(SUM(COALESCE(_s3.n_rows, 0)), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 276282dbe..572bff6d9 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, + SUM(_s3.n_rows) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir + ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC + ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 60fd910bb..4f0d45267 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ), _s5 AS ( SELECT COUNT(*) AS n_rows, - SUM(COALESCE(_s3.n_rows, 0)) AS sum_n_incidents, + SUM(_s3.n_rows) AS sum_n_incidents, devices.de_product_id FROM main.devices AS devices JOIN main.products AS products @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir + ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC + ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 From 856e1a9e12539e73fbc5bb23cc16a1f938c628df Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 13:51:56 -0400 Subject: [PATCH 23/97] Adjusting parameters of optimization --- pydough/conversion/relational_converter.py | 10 +- .../agg_simplification_2.txt | 8 +- tests/test_plan_refsols/aggregate_semi.txt | 4 +- .../aggregation_analytics_2.txt | 25 +- .../aggregation_analytics_3.txt | 25 +- tests/test_plan_refsols/common_prefix_c.txt | 31 ++- tests/test_plan_refsols/common_prefix_d.txt | 35 ++- tests/test_plan_refsols/common_prefix_h.txt | 33 ++- tests/test_plan_refsols/common_prefix_o.txt | 4 +- .../month_year_sliding_windows.txt | 11 +- ...ple_simple_aggregations_multiple_calcs.txt | 8 +- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../technograph_monthly_incident_rate.txt | 49 ++-- ...umulative_incident_rate_goldcopperstar.txt | 33 ++- ..._year_cumulative_incident_rate_overall.txt | 21 +- tests/test_plan_refsols/tpch_q7.txt | 25 +- .../various_aggfuncs_simple.txt | 4 +- .../simple_pydough_functions.py | 42 +-- tests/test_pydough_to_sql.py | 14 + .../agg_simplification_1_ansi.sql | 87 +++++++ .../agg_simplification_1_sqlite.sql | 239 ++++++++++++++++++ .../agg_simplification_2_ansi.sql | 16 ++ .../agg_simplification_2_sqlite.sql | 16 ++ .../defog_dealership_basic5_ansi.sql | 6 +- .../defog_dealership_basic5_sqlite.sql | 6 +- 25 files changed, 550 insertions(+), 206 deletions(-) create mode 100644 tests/test_sql_refsols/agg_simplification_1_ansi.sql create mode 100644 tests/test_sql_refsols/agg_simplification_1_sqlite.sql create mode 100644 tests/test_sql_refsols/agg_simplification_2_ansi.sql create mode 100644 tests/test_sql_refsols/agg_simplification_2_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index ac67d9852..09ed9802b 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1443,12 +1443,10 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup. - # print() - # print(root.to_tree_string()) - root = confirm_root(pullup_projections(root)) - # print() - # print(root.to_tree_string()) + # Step 8: run projection pullup followed by column pruning 2x. + for _ in range(2): + root = confirm_root(pullup_projections(root)) + root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run filter pushdown root._input = push_filters(root.input, set()) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt index 917655ad9..c1121462b 100644 --- a/tests/test_plan_refsols/agg_simplification_2.txt +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -1,5 +1,3 @@ -ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', max_anys), ('a9', anything_anys)], orderings=[(sbCustState):asc_first]) - AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'anything_anys': ANYTHING(anys), 'max_anys': MAX(anys), 'max_max_sbCustPhone': MAX(max_sbCustPhone), 'min_anys': MIN(anys), 'min_min_sbCustPhone': MIN(min_sbCustPhone), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_nj': SUM(nj), 'sum_sz': SUM(sum_expr_11)}) - PROJECT(columns={'anys': anything_expr_9, 'max_sbCustPhone': max_sbCustPhone, 'min_sbCustPhone': min_sbCustPhone, 'n_rows': n_rows, 'nj': count_expr_10, 'sbCustState': sbCustState, 'sum_expr_11': sum_expr_11}) - AGGREGATE(keys={'sbCustCity': sbCustCity, 'sbCustState': sbCustState}, aggregations={'anything_expr_9': ANYTHING(LOWER(sbCustState)), 'count_expr_10': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'max_sbCustPhone': MAX(sbCustPhone), 'min_sbCustPhone': MIN(sbCustPhone), 'n_rows': COUNT(), 'sum_expr_11': SUM(INTEGER(sbCustPostalCode))}) - SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) + AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_max_sbCustPhone': MAX(sbCustPhone), 'min_anys': ANYTHING(LOWER(sbCustState)), 'min_min_sbCustPhone': MIN(sbCustPhone), 'n_rows': NDISTINCT(sbCustCity), 'sum_n_rows': COUNT(), 'sum_nj': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_sz': SUM(INTEGER(sbCustPostalCode))}) + SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 8b74b522a..7e7f2a981 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 43a1d3b81..1d8f376f1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 2e100b381..df3d64e66 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,15 +1,14 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - PROJECT(columns={'anything_ps_partkey': ps_partkey, 'sum_l_quantity': sum_l_quantity, 'sum_revenue': sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) + FILTER(condition=STARTSWITH(p_container, 'MED':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index c7f3f076b..3b51f1f96 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,18 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_sum_sum_expr_18_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows_1': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index a9e20fe8d..498795765 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,24 +1,23 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_10': sum_sum_expr_10, 'sum_sum_expr_7': sum_sum_expr_7, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(sum_n_rows_2), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_2': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index fc806c3fc..2001b4675 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0_1)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_sum_expr_18_0_1': t1.sum_sum_sum_expr_18_0_1, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0_1': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows_1': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': COUNT(), 'sum_sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 808be5ed1..056f09af0 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': n_small_parts, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index cc7064f09..41ba7be61 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,12 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': year}, aggregations={'sum_month_total_spent': SUM(sum_o_totalprice)}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 2186c653c..afbab2010 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal_1': AVG(s_acctbal), 'max_s_acctbal_1': MAX(s_acctbal), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 5dbeb3b2f..f0f4f0fb3 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -7,9 +7,9 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 26d7d259d..0bb678bcf 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,28 +1,27 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1, 'year': year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) + FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) + PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) - FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) - SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + SCAN(table=main.DEVICES, columns={'de_production_country_id': de_production_country_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) + FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) + SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index f51c8a594..613013975 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -4,24 +4,23 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) - FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) + FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index dd5c32202..45394da84 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,16 +1,15 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(n_rows_1_1), 'sum_n_rows': SUM(n_rows)}) - PROJECT(columns={'ca_dt': ca_dt, 'n_rows': n_rows, 'n_rows_1_1': n_rows_1}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 8be0f5377..a98bd9b69 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,16 +1,15 @@ ROOT(columns=[('SUPP_NATION', n_name), ('CUST_NATION', cust_nation), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(n_name):asc_first, (cust_nation):asc_first, (l_year):asc_first]) AGGREGATE(keys={'cust_nation': cust_nation, 'l_year': YEAR(l_shipdate), 'n_name': n_name}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - PROJECT(columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) - FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name, 'name_8': name_8}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) - FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'FRANCE':string & name_8 == 'GERMANY':string | n_name == 'GERMANY':string & name_8 == 'FRANCE':string, columns={'cust_nation': name_8, 'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_shipdate': l_shipdate, 'n_name': n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t0.n_name, 'name_8': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 44a91ea4a..49fed6fa7 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal': COUNT(c_acctbal), 'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 7ed47dc5e..3df9ca79a 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2986,7 +2986,11 @@ def quantile_function_test_4(): def agg_simplification_1(): - # TODO + # Partition the tickers on the value + # `LENGTH(KEEP_IF(exchange, exchange != "NYSE Arca"))`, then for every + # combination of 1, 2, -1, -3, 0, 0.5, null, and the partition key, call + # the aggregation functions SUM, COUNT, NDISTINCT, AVG, MIN, MAX, + # ANYTHING, and MEDIAN, and QUANTILE on each of the inputs. kwargs = {} args = [ tickers.one, @@ -3035,26 +3039,16 @@ def agg_simplification_1(): ) -""" -SELECT - LENGTH(NULLIF(sbTickerExchange, 'NYSE Arca')) AS aug_exchange, - COUNT(*) -from main.sbticker -GROUP BY 1 -ORDER BY 1 -; - -|3 -NASDAQ|10 -NYSE|4 -Vanguard|4 -[None, 4, 6, 8] -[3, 10, 4, 4] -""" - - def agg_simplification_2(): - # TODO + # Partition the customers by city/state then by state to compute the + # following aggregations per-state: + # 1. Number of cities pers state + # 2. Total number of customers per state + # 3. Total postal code sum per state + # 4. Total number of customers with names starting with "j" per state + # 5. Minimum phone number per state + # 6. Maximum phone number per state + # 7-9: Convoluted ways to pass around the lowercase state name return ( customers.PARTITION(name="cities", by=(city, state)) .CALCULATE( @@ -3080,11 +3074,3 @@ def agg_simplification_2(): ) .ORDER_BY(state.ASC()) ) - - -""" -SELECT sbCustState, sbCustCity, COUNT(*), COUNT(CASE WHEN LOWER(sbCustName) LIKE 'j%' THEN 1 END), SUM(CAST(sbCustPostalCode AS INTEGER)), MIN(sbCustPhone), MAX(sbCustPhone), MAX(LOWER(sbCustState)) -FROM main.sbcustomer -GROUP BY 1, 2 -ORDER BY 1, 2; -""" diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index 00530b6a4..7af97012f 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -26,6 +26,8 @@ window_functions, ) from tests.test_pydough_functions.simple_pydough_functions import ( + agg_simplification_1, + agg_simplification_2, cumulative_stock_analysis, datediff, datetime_sampler, @@ -236,6 +238,18 @@ def test_pydough_to_sql_tpch( "Broker", id="week_offset", ), + pytest.param( + agg_simplification_1, + "agg_simplification_1", + "Broker", + id="agg_simplification_1", + ), + pytest.param( + agg_simplification_2, + "agg_simplification_2", + "Broker", + id="agg_simplification_2", + ), pytest.param( cumulative_stock_analysis, "cumulative_stock_analysis", diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql new file mode 100644 index 000000000..6c346edc0 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -0,0 +1,87 @@ +SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + COALESCE(COUNT(*), 0) AS su1, + COALESCE(COUNT(*) * 2, 0) AS su2, + COALESCE(COUNT(*) * -1, 0) AS su3, + COALESCE(COUNT(*) * -3, 0) AS su4, + 0 AS su5, + COALESCE(COUNT(*) * 0.5, 0) AS su6, + COALESCE(NULL, 0) AS su7, + COALESCE( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), + 0 + ) AS su8, + COUNT(*) AS co1, + COUNT(*) AS co2, + COUNT(*) AS co3, + COUNT(*) AS co4, + COUNT(*) AS co5, + COUNT(*) AS co6, + 0 AS co7, + COUNT(*) * CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS BIGINT) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS BIGINT) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS an8, + 1 AS me1, + 2 AS me2, + -1 AS me3, + -3 AS me4, + 0 AS me5, + 0.5 AS me6, + NULL AS me7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS me8, + 1 AS qu1, + 2 AS qu2, + -1 AS qu3, + -3 AS qu4, + 0 AS qu5, + 0.5 AS qu6, + NULL AS qu7, + PERCENTILE_DISC(0.8) WITHIN GROUP (ORDER BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) NULLS LAST) AS qu8 +FROM main.sbticker +GROUP BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) +ORDER BY + aug_exchange diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql new file mode 100644 index 000000000..4716a0c4c --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -0,0 +1,239 @@ +WITH _t1 AS ( + SELECT + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 1 DESC) - 1.0 + ) - ( + CAST(( + COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 1 + ELSE NULL + END AS expr_72, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 2 DESC) - 1.0 + ) - ( + CAST(( + COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 2 + ELSE NULL + END AS expr_73, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -1 DESC) - 1.0 + ) - ( + CAST(( + COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN -1 + ELSE NULL + END AS expr_74, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -3 DESC) - 1.0 + ) - ( + CAST(( + COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN -3 + ELSE NULL + END AS expr_75, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0 DESC) - 1.0 + ) - ( + CAST(( + COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 0 + ELSE NULL + END AS expr_76, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0.5 DESC) - 1.0 + ) - ( + CAST(( + COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN 0.5 + ELSE NULL + END AS expr_77, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY NULL DESC) - 1.0 + ) - ( + CAST(( + COUNT(NULL) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN NULL + ELSE NULL + END AS expr_78, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) - 1.0 + ) - ( + CAST(( + COUNT( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ELSE NULL + END AS expr_79, + CASE + WHEN CAST(0.9 * COUNT(1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 1 DESC) + THEN 1 + ELSE NULL + END AS expr_80, + CASE + WHEN CAST(0.8 * COUNT(2) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 2 DESC) + THEN 2 + ELSE NULL + END AS expr_81, + CASE + WHEN CAST(0.7 * COUNT(-1) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -1 DESC) + THEN -1 + ELSE NULL + END AS expr_82, + CASE + WHEN CAST(0.6 * COUNT(-3) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY -3 DESC) + THEN -3 + ELSE NULL + END AS expr_83, + CASE + WHEN CAST(0.5 * COUNT(0) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0 DESC) + THEN 0 + ELSE NULL + END AS expr_84, + CASE + WHEN CAST(0.4 * COUNT(0.5) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY 0.5 DESC) + THEN 0.5 + ELSE NULL + END AS expr_85, + CASE + WHEN CAST(0.30000000000000004 * COUNT(NULL) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY NULL DESC) + THEN NULL + ELSE NULL + END AS expr_86, + CASE + WHEN CAST(0.19999999999999996 * COUNT( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ) OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END)) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) ORDER BY LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) DESC) + THEN LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) + ELSE NULL + END AS expr_87, + sbtickerexchange + FROM main.sbticker +) +SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + COALESCE(COUNT(*), 0) AS su1, + COALESCE(COUNT(*) * 2, 0) AS su2, + COALESCE(COUNT(*) * -1, 0) AS su3, + COALESCE(COUNT(*) * -3, 0) AS su4, + 0 AS su5, + COALESCE(COUNT(*) * 0.5, 0) AS su6, + COALESCE(NULL, 0) AS su7, + COALESCE( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), + 0 + ) AS su8, + COUNT(*) AS co1, + COUNT(*) AS co2, + COUNT(*) AS co3, + COUNT(*) AS co4, + COUNT(*) AS co5, + COUNT(*) AS co6, + 0 AS co7, + COUNT(*) * CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS INTEGER) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT ( + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) IS NULL + ) AS INTEGER) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS an8, + AVG(expr_72) AS me1, + AVG(expr_73) AS me2, + AVG(expr_74) AS me3, + AVG(expr_75) AS me4, + AVG(expr_76) AS me5, + AVG(expr_77) AS me6, + AVG(expr_78) AS me7, + AVG(expr_79) AS me8, + MAX(expr_80) AS qu1, + MAX(expr_81) AS qu2, + MAX(expr_82) AS qu3, + MAX(expr_83) AS qu4, + MAX(expr_84) AS qu5, + MAX(expr_85) AS qu6, + MAX(expr_86) AS qu7, + MAX(expr_87) AS qu8 +FROM _t1 +GROUP BY + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) +ORDER BY + aug_exchange diff --git a/tests/test_sql_refsols/agg_simplification_2_ansi.sql b/tests/test_sql_refsols/agg_simplification_2_ansi.sql new file mode 100644 index 000000000..d36f0482c --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_ansi.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COALESCE(COUNT(*), 0) AS a2, + COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, + COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + ANY_VALUE(LOWER(sbcuststate)) AS a7, + ANY_VALUE(LOWER(sbcuststate)) AS a8, + ANY_VALUE(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + sbcuststate +ORDER BY + sbcuststate diff --git a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql new file mode 100644 index 000000000..deee0c7a6 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COALESCE(COUNT(*), 0) AS a2, + COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, + COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + MAX(LOWER(sbcuststate)) AS a7, + MAX(LOWER(sbcuststate)) AS a8, + MAX(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + sbcuststate +ORDER BY + sbcuststate diff --git a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql index 4239e292e..9a9471dea 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_ansi.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -12,11 +12,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, + _s1.n_rows_1 AS total_sales, COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql index 796cac46a..9f797c2bc 100644 --- a/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_basic5_sqlite.sql @@ -1,6 +1,6 @@ WITH _s1 AS ( SELECT - COUNT(*) AS n_rows, + COUNT(*) AS n_rows_1, SUM(sale_price) AS sum_sale_price, salesperson_id FROM main.sales @@ -14,11 +14,11 @@ WITH _s1 AS ( SELECT salespersons.first_name, salespersons.last_name, - _s1.n_rows AS total_sales, + _s1.n_rows_1 AS total_sales, COALESCE(_s1.sum_sale_price, 0) AS total_revenue FROM main.salespersons AS salespersons JOIN _s1 AS _s1 ON _s1.salesperson_id = salespersons._id ORDER BY - _s1.n_rows DESC + _s1.n_rows_1 DESC LIMIT 5 From c4298cb4599d0e2cdbc584edafa90f87884844d8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 21:56:09 -0400 Subject: [PATCH 24/97] Pulled out common logic from filter/join/limit and added comments --- pydough/conversion/projection_pullup.py | 260 +++++++++++++++++------- 1 file changed, 183 insertions(+), 77 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index a24f2e6c3..80e267b84 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -41,22 +41,46 @@ def widen_columns( node: RelationalNode, ) -> dict[RelationalExpression, RelationalExpression]: """ - TODO + Modifies a relational node in-place to ensure every column in the node's + inputs is also present in the node's output columns. Returns a substitution + mapping such that any expression pulled into the parent of the node can be + transformed to point to the node's output columns. + + Args: + `node`: The relational node to "widen" by adding more columns to. + + Returns: + A mapping that can be used for substitution if expressions from the + node are pulled up into the parent of the node. """ + # The substitution mapping that will be built by the functiona nd returned + # to the calling site. + substitutions: dict[RelationalExpression, RelationalExpression] = {} + + # Mapping of every expression in the node's columns to a reference to the + # column of the node that points to it. This is used to keep track of which + # expressions are already present in the node's columns versus the ones that + # should be added to un-prune the node. existing_vals: dict[RelationalExpression, RelationalExpression] = { expr: ColumnReference(name, expr.data_type) for name, expr in node.columns.items() } - substitutions: dict[RelationalExpression, RelationalExpression] = {} + + # Pull all the columns from each input to the node into the node's output + # columns if they are not already in the node's output columns. for input_idx in range(len(node.inputs)): input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] for name, expr in input_node.columns.items(): + # If the current node is a Join, add input names to the expression. if isinstance(node, Join): expr = add_input_name(expr, input_alias) ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_alias ) + # If the expression is not already in the node's columns, then + # inject it so the node can use it later if a pull-up occurs that + # would need to reference this expression. if expr not in existing_vals: new_name: str = name idx: int = 0 @@ -69,16 +93,35 @@ def widen_columns( substitutions[ref_expr] = new_ref else: substitutions[ref_expr] = existing_vals[expr] + + # Return the substitution mapping, without any no-op substitutions return {k: v for k, v in substitutions.items() if k != v} -def pull_non_columns(node: RelationalNode) -> RelationalNode: +def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: """ - TODO + Pulls up non-column expressions from the output columns of a Join, Filter, + or Limit node into a parent projection. + + Args: + `node`: The Join, Filter, or Limit node to pull up non-column expressions + from. + + Returns: + Either the original node if this rewrite is not applicable, or a + project node that contains the non-column expressions pulled up from + the output columns of the node, pointing to `node` as its input. """ + # The columns that will be used in the parent projection. new_project_columns: dict[str, RelationalExpression] = {} + + # A boolean to indicate if any columns were pulled besides no-ops. If this + # never becomes true, then we skip the rewrite and return the original. needs_pull: bool = False + # Iterate through the columns of the node and check if they are column + # references or not. If they are not, then we need to pull them up into + # the parent projection. for name, expr in node.columns.items(): if isinstance(expr, ColumnReference): new_project_columns[name] = ColumnReference(name, expr.data_type) @@ -86,59 +129,144 @@ def pull_non_columns(node: RelationalNode) -> RelationalNode: new_project_columns[name] = expr needs_pull = True + # Skip the rewrite if no columns were pulled up. if not needs_pull: return node + # Ensure every column in the node's inputs is also present in the output + # columns of the node. This will ensure that any function calls that are + # pulled into a parent projection can have their inputs substituted with + # references to the node's output columns. Ensure the substitutions do not + # have any input names in the values. substitutions: dict[RelationalExpression, RelationalExpression] = widen_columns( node ) substitutions = {k: add_input_name(v, None) for k, v in substitutions.items()} + + # Create the columns of the new projection by applying the substitutions + # to the expressions pulled up earlier. for name, expr in new_project_columns.items(): new_project_columns[name] = apply_substitution(expr, substitutions, {}) - return merge_adjacent_projects(Project(input=node, columns=new_project_columns)) + # Build the new project node pointing to the input but with the new columns. + return Project(input=node, columns=new_project_columns) -def pull_project_into_join(node: Join, input_index: int) -> None: +def pull_project_helper( + columns: dict[str, RelationalExpression], + conditions: list[RelationalExpression], + ordering: list[ExpressionSortInfo], + project: Project, + input_name: str | None, +) -> dict[RelationalExpression, RelationalExpression]: """ - TODO + Main helper utility for pulling up columns from a Project node into a + a parent Filter/Join/Limit node. This function modifies the input project + in-place to ensure every column in the project's inputs is available + to the parent node, and returns a mapping of expressions that can be used + to substitute the columns in the parent node's output columns or conditions. + + Args: + `columns`: The columns of the parent node that the expressions from the + project node can be pulled into. + `conditions`: The condition of the parent node that the expressions + from the project node can be pulled into. This is a list so that + nodes without a condition can pass it in as empty. + `ordering`: The orderings of the parent node that the expressions from + the project node can be pulled into. This is a list so that nodes + without orderings can pass it in as empty. + `project`: The Project node to pull columns from. + `input_name`: The name of the input to the parent node that the project + node is connected to. This is used to add input names to the + expressions pulled from the project node when dealing with joins. + + Returns: + A mapping of expressions that can be used to substitute the columns in + the parent node's output columns or conditions. This mapping will + ensure columns are only pulled up if they do not contain window + functions, and they are not simultaneously used in the parent's output + while also being used in the condition or orderings. """ - if not isinstance(node.inputs[input_index], Project): - return - - project = node.inputs[input_index] - assert isinstance(project, Project) - - input_name: str | None = node.default_input_aliases[input_index] + # Ensure every column in the project's inputs is also present in the output + # columns of the project. This will ensure that any function calls that are + # pulled into the parent can have their inputs substituted with references + # to columns from the project. + transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( + widen_columns(project) + ) + # Identify which columns from the project node are used in the condition + # or orderings, versus those used in the output columns of the parent. finder: ColumnReferenceFinder = ColumnReferenceFinder() + + # First, the columns used in the output columns of the parent. finder.reset() - node.condition.accept(finder) - condition_cols: set[ColumnReference] = finder.get_column_references() - condition_names: set[str] = {col.name for col in condition_cols} - finder.reset() - for expr in node.columns.values(): + for expr in columns.values(): expr.accept(finder) output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) - ) + # Next the columns used in the condition or orderings + finder.reset() + for cond in conditions: + cond.accept(finder) + for order_expr in ordering: + order_expr.expr.accept(finder) + used_cols: set[ColumnReference] = finder.get_column_references() + used_names: set[str] = {col.name for col in used_cols} + # Iterate through the columns of the project to see which ones can be + # pulled up into the parent's output columns vs condition/orderings, + # adding them to a substitutions mapping that will be used to apply the + # transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): new_expr: RelationalExpression = add_input_name( apply_substitution(expr, transfer_substitutions, {}), input_name ) if (not contains_window(new_expr)) and ( - (name in condition_names) != (name in output_names) + (name in used_names) != (name in output_names) ): ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_name ) substitutions[ref_expr] = new_expr + return substitutions + +def pull_project_into_join(node: Join, input_index: int) -> None: + """ + Attempts to pull columns from a Project node that is an input to a Join + into the output columns of the Join node, and into its join condition. + This transformation is done in-place. + + Args: + `node`: The Join node to pull the Project columns into. + `input_index`: The index of the input to the Join node that should have + its columns pulled up, if it is a project node. + """ + + # Skip if the input at the specified input is not a Project node. + if not isinstance(node.inputs[input_index], Project): + return + project = node.inputs[input_index] + assert isinstance(project, Project) + + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the join's output columns or + # condition, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper( + node.columns, + [node.condition], + [], + project, + node.default_input_aliases[input_index], + ) + ) + + # Apply the substitutions to the join's condition and output columns. node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) @@ -148,37 +276,27 @@ def pull_project_into_join(node: Join, input_index: int) -> None: def pull_project_into_filter(node: Filter) -> None: """ - TODO + Attempts to pull columns from a Project node that is an input to a Filter + into the output columns of the Filter node, and into the filter condition. + This transformation is done in-place. + + Args: + `node`: The Filter node to pull the Project columns into. """ + + # Skip if the filter's input is not a Project node. if not isinstance(node.input, Project): return - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - node.condition.accept(finder) - condition_cols: set[ColumnReference] = finder.get_column_references() - condition_names: set[str] = {col.name for col in condition_cols} - finder.reset() - for expr in node.columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the filter's output columns or + # condition, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper(node.columns, [node.condition], [], node.input, None) ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_expr: RelationalExpression = apply_substitution( - expr, transfer_substitutions, {} - ) - if (not contains_window(new_expr)) and ( - (name in condition_names) != (name in output_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr + + # Apply the substitutions to the filter's condition and output columns. node._condition = apply_substitution(node.condition, substitutions, {}) node._columns = { name: apply_substitution(expr, substitutions, {}) @@ -188,39 +306,27 @@ def pull_project_into_filter(node: Filter) -> None: def pull_project_into_limit(node: Limit) -> None: """ - TODO + Attempts to pull columns from a Project node that is an input to a Limit + into the output columns of the Limit node, and into the ordering + expressions. This transformation is done in-place. + + Args: + `node`: The Limit node to pull the Project columns into. """ + + # Skip if the limit's input is not a Project node. if not isinstance(node.input, Project): return - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - for expr in node.columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - finder.reset() - for order_expr in node.orderings: - order_expr.expr.accept(finder) - order_cols: set[ColumnReference] = finder.get_column_references() - order_names: set[str] = {col.name for col in order_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) + # Invoke the common helper for Join/Filter/Limit to identify which columns + # from the project can be pulled up into the limit's output columns or + # orderings, and modifies the project node in-place to ensure every + # column in the project's inputs is available to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper(node.columns, [], node.orderings, node.input, None) ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - for name, expr in project.columns.items(): - new_expr: RelationalExpression = apply_substitution( - expr, transfer_substitutions, {} - ) - if (not contains_window(new_expr)) and ( - (name in output_names) != (name in order_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr + + # Apply the substitutions to the limit's orderings and output columns. node._columns = { name: apply_substitution(expr, substitutions, {}) for name, expr in node.columns.items() From 5e9f09daed856973eb06478ca42d65338a74011b Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 22:30:27 -0400 Subject: [PATCH 25/97] Added remaining comments --- pydough/conversion/projection_pullup.py | 315 +++++++++++++++--------- 1 file changed, 202 insertions(+), 113 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 80e267b84..356acca41 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -53,7 +53,7 @@ def widen_columns( A mapping that can be used for substitution if expressions from the node are pulled up into the parent of the node. """ - # The substitution mapping that will be built by the functiona nd returned + # The substitution mapping that will be built by the functions and returned # to the calling site. substitutions: dict[RelationalExpression, RelationalExpression] = {} @@ -153,28 +153,24 @@ def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: def pull_project_helper( - columns: dict[str, RelationalExpression], - conditions: list[RelationalExpression], - ordering: list[ExpressionSortInfo], + output_columns: dict[str, RelationalExpression], + used_columns: set[RelationalExpression], project: Project, input_name: str | None, ) -> dict[RelationalExpression, RelationalExpression]: """ Main helper utility for pulling up columns from a Project node into a - a parent Filter/Join/Limit node. This function modifies the input project - in-place to ensure every column in the project's inputs is available + a parent Filter/Join/Limit/Aggregate node. This function modifies the input + project in-place to ensure every column in the project's inputs is available to the parent node, and returns a mapping of expressions that can be used to substitute the columns in the parent node's output columns or conditions. Args: - `columns`: The columns of the parent node that the expressions from the - project node can be pulled into. - `conditions`: The condition of the parent node that the expressions - from the project node can be pulled into. This is a list so that - nodes without a condition can pass it in as empty. - `ordering`: The orderings of the parent node that the expressions from - the project node can be pulled into. This is a list so that nodes - without orderings can pass it in as empty. + `output_columns`: The columns of the parent node that the expressions + from the project node can be pulled into. + `used_columns`: The set of expressions indicating invocations of the + columns from the project in the parent node, e.g. as a filter + or join condition, limit ordering, or aggregation key. `project`: The Project node to pull columns from. `input_name`: The name of the input to the parent node that the project node is connected to. This is used to add input names to the @@ -201,17 +197,15 @@ def pull_project_helper( # First, the columns used in the output columns of the parent. finder.reset() - for expr in columns.values(): + for expr in output_columns.values(): expr.accept(finder) output_cols: set[ColumnReference] = finder.get_column_references() output_names: set[str] = {col.name for col in output_cols} - # Next the columns used in the condition or orderings + # Next the columns that are utilized by the node. finder.reset() - for cond in conditions: - cond.accept(finder) - for order_expr in ordering: - order_expr.expr.accept(finder) + for expr in used_columns: + expr.accept(finder) used_cols: set[ColumnReference] = finder.get_column_references() used_names: set[str] = {col.name for col in used_cols} @@ -252,15 +246,14 @@ def pull_project_into_join(node: Join, input_index: int) -> None: project = node.inputs[input_index] assert isinstance(project, Project) - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the join's output columns or - # condition, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the join's output + # columns or condition, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( pull_project_helper( node.columns, - [node.condition], - [], + {node.condition}, project, node.default_input_aliases[input_index], ) @@ -288,12 +281,12 @@ def pull_project_into_filter(node: Filter) -> None: if not isinstance(node.input, Project): return - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the filter's output columns or - # condition, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the filter's output + # columns or condition, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, [node.condition], [], node.input, None) + pull_project_helper(node.columns, {node.condition}, node.input, None) ) # Apply the substitutions to the filter's condition and output columns. @@ -318,12 +311,17 @@ def pull_project_into_limit(node: Limit) -> None: if not isinstance(node.input, Project): return - # Invoke the common helper for Join/Filter/Limit to identify which columns - # from the project can be pulled up into the limit's output columns or - # orderings, and modifies the project node in-place to ensure every - # column in the project's inputs is available to the current node. + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the limit's output + # columns or orderings, and modifies the project node in-place to ensure + # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, [], node.orderings, node.input, None) + pull_project_helper( + node.columns, + {order_expr.expr for order_expr in node.orderings}, + node.input, + None, + ) ) # Apply the substitutions to the limit's orderings and output columns. @@ -341,76 +339,46 @@ def pull_project_into_limit(node: Limit) -> None: ] -def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: - """ - TODO - """ - if not isinstance(node.input, Project): - return node - - project: Project = node.input - - finder: ColumnReferenceFinder = ColumnReferenceFinder() - finder.reset() - for key_expr in node.aggregations.values(): - key_expr.accept(finder) - agg_cols: set[ColumnReference] = finder.get_column_references() - agg_names: set[str] = {col.name for col in agg_cols} - finder.reset() - for agg_expr in node.keys.values(): - agg_expr.accept(finder) - key_cols: set[ColumnReference] = finder.get_column_references() - key_names: set[str] = {col.name for col in key_cols} - - transfer_substitutions: dict[RelationalExpression, RelationalExpression] = ( - widen_columns(project) - ) - substitutions: dict[RelationalExpression, RelationalExpression] = {} - new_expr: RelationalExpression - for name, expr in project.columns.items(): - new_expr = apply_substitution(expr, transfer_substitutions, {}) - if (not contains_window(new_expr)) and ( - (name in agg_names) != (name in key_names) - ): - ref_expr: ColumnReference = ColumnReference(name, expr.data_type) - substitutions[ref_expr] = new_expr - new_columns: dict[str, RelationalExpression] = { - name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() - } - new_keys: dict[str, RelationalExpression] = { - name: apply_substitution(expr, substitutions, {}) - for name, expr in node.keys.items() - } - new_aggs: dict[str, CallExpression] = {} - out_expr: RelationalExpression - new_agg_expr: CallExpression | None - for name, expr in node.aggregations.items(): - new_expr = apply_substitution(expr, substitutions, {}) - assert isinstance(new_expr, CallExpression) - out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) - new_columns[name] = out_expr - if new_agg_expr is not None: - new_aggs[name] = new_agg_expr - agg: Aggregate = Aggregate( - input=node.input, - keys=new_keys, - aggregations=new_aggs, - ) - return merge_adjacent_projects(Project(input=agg, columns=new_columns)) - - def simplify_agg( keys: dict[str, RelationalExpression], agg: CallExpression, name: str ) -> tuple[RelationalExpression, CallExpression | None]: """ - TODO + Simplifies an aggregation call by checking if the combination of the + function versus its inputs can be rewritten in another form. The rewrite + allows expressions to be done after aggregation since there will be a + parent projection on top of the aggregate. + + Args: + `keys`: The keys of the aggregation, used for simplifications when an + aggregation function is called on a key. + `agg`: The aggregation call to simplify. + `name`: The name of the aggregation, used to build a reference in the + parent project node to the output of the aggregation. + + Returns: + A tuple containing two terms: + - The first term is the output expression that should be used in the + parent project node to refer to the final result of the aggregation + after any post-processing is done. This may contain a reference to + column `name` of the aggregation. + - The second term is the aggregation call that should be referred to + by the parent project when deriving the final answer. If this is + `None`, then the output expression can be derived entirely in the + project and does not require an aggregation call. """ + arg: RelationalExpression + + # Build a mapping from every key expression to its name. reverse_keys: dict[RelationalExpression, str] = { expr: name for name, expr in keys.items() } - out_ref: RelationalExpression = ColumnReference(name, agg.data_type) - arg: RelationalExpression + # Commonly used terms: + # - Reference to the output of the aggregation + # - Literal 0 + # - Literal 1 + # - COUNT(*) call + out_ref: RelationalExpression = ColumnReference(name, agg.data_type) zero_expr: RelationalExpression = LiteralExpression(0, agg.data_type) one_expr: RelationalExpression = LiteralExpression(1, agg.data_type) count_star: CallExpression = CallExpression( @@ -466,18 +434,16 @@ def simplify_agg( agg.op == pydop.SUM and len(agg.inputs) == 1 and isinstance(agg.inputs[0], CallExpression) + and agg.inputs[0].op == pydop.DEFAULT_TO + and isinstance(agg.inputs[0].inputs[1], LiteralExpression) + and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) + and agg.inputs[0].inputs[1].value == 0 ): - if ( - agg.inputs[0].op == pydop.DEFAULT_TO - and isinstance(agg.inputs[0].inputs[1], LiteralExpression) - and isinstance(agg.inputs[0].inputs[1].data_type, NumericType) - and agg.inputs[0].inputs[1].value == 0 - ): - return CallExpression( - pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] - ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) + return CallExpression( + pydop.DEFAULT_TO, agg.data_type, [out_ref, zero_expr] + ), CallExpression(pydop.SUM, agg.data_type, [agg.inputs[0].inputs[0]]) - # If the aggregation is on a key, we can just use the key. + # For many aggregations, if the argument is a key, we can just use the key. if ( agg.op in ( @@ -495,6 +461,7 @@ def simplify_agg( ): arg = agg.inputs[0] if arg in reverse_keys: + # Reference to the key from the perspective of the project. key_ref: RelationalExpression = ColumnReference( reverse_keys[arg], agg.data_type ) @@ -547,29 +514,117 @@ def simplify_agg( return out_ref, agg +def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: + """ + Attempts to pull columns from a Project node that is an input to an + Aggregate into the inputs of the aggregation calls of the Aggregate, and + into the grouping keys. Additionally, simplifies the aggregation calls when + possible. This transformation is done in-place. + + Args: + `node`: The Filter node to pull the Project columns into. + """ + if not isinstance(node.input, Project): + return node + + # Invoke the common helper for Join/Filter/Limit/Aggregate to identify + # which columns from the project can be pulled up into the aggregation's + # keys or used as inputs to its aggregation calls, and modifies the project + # node in-place to ensure every column in the project's inputs is available + # to the current node. + substitutions: dict[RelationalExpression, RelationalExpression] = ( + pull_project_helper( + dict(node.aggregations.items()), set(node.keys.values()), node.input, None + ) + ) + + # Build up the columns of a new project that points to all of the output + # columns of the aggregate. Start with just the keys, since the aggs will + # be added later. + new_columns: dict[str, RelationalExpression] = { + name: ColumnReference(name, expr.data_type) for name, expr in node.keys.items() + } + + # Apply the substitutions to the keys and aggregations of the aggregate. + new_keys: dict[str, RelationalExpression] = { + name: apply_substitution(expr, substitutions, {}) + for name, expr in node.keys.items() + } + + # Apply the substitutions to the aggregation calls of the aggregate, + # then try to simplify them, before updating the `new_columns`. + new_aggs: dict[str, CallExpression] = {} + out_expr: RelationalExpression + new_agg_expr: CallExpression | None + for name, expr in node.aggregations.items(): + new_expr = apply_substitution(expr, substitutions, {}) + assert isinstance(new_expr, CallExpression) + # Simplify agg returns the value used in the project to store the + # answer, and the aggregation value used to derive it (if needed). If + # the aggregation value is None, then it means the aggregation was + # simplified in a way that could be derived entirely in the project. + # Otherwise, the aggregation value is referenced in the project via + # a reference to `name`. + out_expr, new_agg_expr = simplify_agg(new_keys, new_expr, name) + new_columns[name] = out_expr + if new_agg_expr is not None: + new_aggs[name] = new_agg_expr + + # Build the new aggregation with the new keys/aggs, then wrap the new + # project around it. The new project is required in case `simplify_agg` + # returned any `output_expr` values that post-process the aggregation + # results, e.g. replacing `SUM(3)` with `3 * COUNT(*)`, or `MIN(key)` with + # `key`. + agg: Aggregate = Aggregate( + input=node.input, + keys=new_keys, + aggregations=new_aggs, + ) + return merge_adjacent_projects(Project(input=agg, columns=new_columns)) + + def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: """ - TODO + Attempts to merge two adjacent Aggregate nodes into a single Aggregate + node. + + Args: + `node`: The Aggregate node to merge with its input. + + Returns: + Either the original node if the merge is not applicable, or a new + Aggregate node that uses the keys of the top aggregate node, but + modifies the aggregations to not require the original input round of + aggregation. """ + + # Skip if the input to the node is not an Aggregate. if not isinstance(node.input, Aggregate): return node input_agg: Aggregate = node.input + # Identify all of the keys in the top vs bottom aggregations, transposing + # the top keys so they can be expressed in the same terms as the bottom + # keys. top_keys: set[RelationalExpression] = { transpose_expression(expr, input_agg.columns) for expr in node.keys.values() } bottom_keys: set[RelationalExpression] = set(input_agg.keys.values()) + # If there are any top keys that are not present in the bottom keys, + # then the merge fails. if len(top_keys - bottom_keys) > 0: return node + # Identify any bottom keys that are not present in the top keys. This is + # needed for situations with COUNT(*) in the top aggregation. bottom_only_keys: set[RelationalExpression] = bottom_keys - top_keys - new_keys: dict[str, RelationalExpression] = { - name: transpose_expression(expr, input_agg.columns) - for name, expr in node.keys.items() - } + # Iterate across all of the aggregations in the top Aggregate node and + # transform each of them, building the result in `new_aggs`. If any of them + # cannot be transformed, then the merge fails and we return the original + # node. new_aggs: dict[str, CallExpression] = {} input_expr: RelationalExpression for agg_name, agg_expr in node.aggregations.items(): @@ -636,6 +691,10 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: # If none of the aggregations caused a merge failure, we can return a new # Aggregate node using the top keys and the merged aggregation calls. + new_keys: dict[str, RelationalExpression] = { + name: transpose_expression(expr, input_agg.columns) + for name, expr in node.keys.items() + } return Aggregate( input=input_agg.input, keys=new_keys, @@ -645,26 +704,56 @@ def merge_adjacent_aggregations(node: Aggregate) -> Aggregate: def pullup_projections(node: RelationalNode) -> RelationalNode: """ - TODO + The main recursive procedure done to perform projection pull-up. + + Args: + `node`: The relational node to pull projections up from. + + Returns: + The transformed node with projections pulled up on it and all of its + descendants. """ # Recursively invoke the procedure on all inputs to the node. node = node.copy(inputs=[pullup_projections(input) for input in node.inputs]) + + # Transform the current node versus its inputs depending on the type of + # node it is. match node: + # For Root/Project, attempt to squish with the child node, if possible. case RelationalRoot() | Project(): return merge_adjacent_projects(node) + + # For Join nodes, pull projections from the left input (also the right + # for INNER joins), then eject the non-column expressions + # into a parent projection. case Join(): pull_project_into_join(node, 0) if node.join_type == JoinType.INNER: pull_project_into_join(node, 1) return pull_non_columns(node) + + # For Filter nodes, pull projections into the filter's condition and + # output columns, then eject the non-column expressions into a parent + # projection. case Filter(): pull_project_into_filter(node) return pull_non_columns(node) + + # For Limit nodes, pull projections into the limit's orderings and + # output columns, then eject the non-column expressions into a parent + # projection. case Limit(): pull_project_into_limit(node) return pull_non_columns(node) + + # For Aggregate nodes, pull projections into the aggregation keys and + # aggregations (also simplifying aggregate calls when possible), then + # merge adjacent aggregations if possible. case Aggregate(): node = merge_adjacent_aggregations(node) return pull_project_into_aggregate(node) + + # For all other nodes, just returned the node as-is since its inputs + # have already been transformed. case _: return node From c45b4f224ccfb6cd06015043613599bc4f522194 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 15 Jul 2025 22:31:23 -0400 Subject: [PATCH 26/97] [RUN CI] --- pydough/conversion/relational_converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 09ed9802b..f66debc38 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1454,10 +1454,10 @@ def optimize_relational_tree( # Step 10: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 8: re-run column bubbling + # Step 11: re-run column bubbling root = bubble_column_names(root) - # Step 11: re-run column pruning. + # Step 12: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) return root From 416fbad4d3a8bd85b11573a190ce6bc65e277c10 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:24:23 -0400 Subject: [PATCH 27/97] Added PageRank tests and fixed bugs found along the way --- pydough/conversion/hybrid_decorrelater.py | 4 +- pydough/conversion/relational_converter.py | 6 +- pydough/qdag/collections/partition_child.py | 5 +- pydough/unqualified/qualification.py | 5 +- tests/conftest.py | 117 ++++++++ tests/gen_data/init_pagerank.sql | 42 +++ tests/test_exploration.py | 6 +- tests/test_metadata/pagerank_graphs.json | 64 ++++ tests/test_pipeline_defog.py | 2 +- tests/test_pipeline_defog_custom.py | 2 +- tests/test_pipeline_pagerank.py | 256 ++++++++++++++++ tests/test_pipeline_tpch_custom.py | 2 +- tests/test_pipeline_tpch_udf.py | 2 +- tests/test_plan_refsols/common_prefix_a.txt | 6 +- tests/test_plan_refsols/common_prefix_ak.txt | 52 ++-- tests/test_plan_refsols/common_prefix_b.txt | 11 +- tests/test_plan_refsols/common_prefix_c.txt | 23 +- tests/test_plan_refsols/common_prefix_d.txt | 7 +- tests/test_plan_refsols/common_prefix_f.txt | 13 +- tests/test_plan_refsols/common_prefix_g.txt | 7 +- tests/test_plan_refsols/common_prefix_h.txt | 23 +- tests/test_plan_refsols/correl_15.txt | 4 +- tests/test_plan_refsols/correl_17.txt | 4 +- tests/test_plan_refsols/correl_21.txt | 4 +- tests/test_plan_refsols/correl_23.txt | 4 +- tests/test_plan_refsols/correl_33.txt | 4 +- tests/test_plan_refsols/pagerank_a0.txt | 7 + tests/test_plan_refsols/pagerank_a1.txt | 16 + tests/test_plan_refsols/pagerank_a2.txt | 28 ++ tests/test_plan_refsols/pagerank_a6.txt | 76 +++++ tests/test_plan_refsols/pagerank_b0.txt | 7 + tests/test_plan_refsols/pagerank_b1.txt | 16 + tests/test_plan_refsols/pagerank_b3.txt | 40 +++ tests/test_plan_refsols/pagerank_c4.txt | 52 ++++ tests/test_plan_refsols/pagerank_d1.txt | 16 + tests/test_plan_refsols/pagerank_d5.txt | 64 ++++ tests/test_plan_refsols/triple_partition.txt | 33 ++- .../simple_pydough_functions.py | 27 ++ tests/test_pydough_to_sql.py | 4 +- tests/test_qualification.py | 4 +- tests/test_sql_refsols/pagerank_a0_sqlite.sql | 16 + tests/test_sql_refsols/pagerank_a1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_a2_sqlite.sql | 99 +++++++ tests/test_sql_refsols/pagerank_a6_sqlite.sql | 279 ++++++++++++++++++ tests/test_sql_refsols/pagerank_b0_sqlite.sql | 16 + tests/test_sql_refsols/pagerank_b1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_b3_sqlite.sql | 144 +++++++++ tests/test_sql_refsols/pagerank_c4_sqlite.sql | 189 ++++++++++++ tests/test_sql_refsols/pagerank_d1_sqlite.sql | 58 ++++ tests/test_sql_refsols/pagerank_d5_sqlite.sql | 234 +++++++++++++++ tests/test_unqualified_node.py | 8 +- tests/testing_utilities.py | 38 ++- 52 files changed, 2133 insertions(+), 129 deletions(-) create mode 100644 tests/gen_data/init_pagerank.sql create mode 100644 tests/test_metadata/pagerank_graphs.json create mode 100644 tests/test_pipeline_pagerank.py create mode 100644 tests/test_plan_refsols/pagerank_a0.txt create mode 100644 tests/test_plan_refsols/pagerank_a1.txt create mode 100644 tests/test_plan_refsols/pagerank_a2.txt create mode 100644 tests/test_plan_refsols/pagerank_a6.txt create mode 100644 tests/test_plan_refsols/pagerank_b0.txt create mode 100644 tests/test_plan_refsols/pagerank_b1.txt create mode 100644 tests/test_plan_refsols/pagerank_b3.txt create mode 100644 tests/test_plan_refsols/pagerank_c4.txt create mode 100644 tests/test_plan_refsols/pagerank_d1.txt create mode 100644 tests/test_plan_refsols/pagerank_d5.txt create mode 100644 tests/test_sql_refsols/pagerank_a0_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a2_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_a6_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b0_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_c4_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d1_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/pydough/conversion/hybrid_decorrelater.py b/pydough/conversion/hybrid_decorrelater.py index 120bd1b9c..5412735d1 100644 --- a/pydough/conversion/hybrid_decorrelater.py +++ b/pydough/conversion/hybrid_decorrelater.py @@ -294,8 +294,8 @@ def correl_ref_purge( new_parent_uni_keys, ) if isinstance(operation, HybridCalculate): - for str, expr in operation.new_expressions.items(): - operation.new_expressions[str] = operation.terms[name] + for name, expr in operation.new_expressions.items(): + operation.new_expressions[name] = operation.terms[name] if isinstance(operation, HybridFilter): operation.condition = self.remove_correl_refs( operation.condition, diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 956bb3698..1e51e9df6 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1023,7 +1023,7 @@ def translate_calculate( # it relative to the input context. for name in node.new_expressions: name = node.renamings.get(name, name) - hybrid_expr: HybridExpr = node.terms[name] + hybrid_expr: HybridExpr = node.new_expressions[name] ref_expr: HybridRefExpr = HybridRefExpr(name, hybrid_expr.typ) rel_expr: RelationalExpression = self.translate_expression( hybrid_expr, context @@ -1416,6 +1416,10 @@ def optimize_relational_tree( Returns: The optimized relational root. """ + + # Step 0: prune unused columns. + root = ColumnPruner().prune_unused_columns(root) + # Step 1: push filters down as far as possible root._input = push_filters(root.input, set()) diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e0609e658..da181ec44 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -97,7 +97,10 @@ def get_term(self, term_name: str): if term_name in self.inherited_downstreamed_terms: context: PyDoughCollectionQDAG = self.child_access while term_name not in context.all_terms: - if context is self.child_access: + if ( + context is self.child_access + and term_name in self.ancestor_context.inherited_downstreamed_terms + ): context = self.ancestor_context else: assert context.ancestor_context is not None diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 3430ab7e0..a6dddf1f6 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -881,7 +881,7 @@ def split_partition_ancestry( Returns: A tuple where the first element is the ancestor of all the data being partitioned, the second is the data being partitioned which - now points to an root instead of hte original ancestor, and the + now points to an root instead of the original ancestor, and the third is a list of the ancestor names. """ @@ -903,6 +903,7 @@ def split_partition_ancestry( | UnqualifiedOrderBy() | UnqualifiedSingular() | UnqualifiedPartition() + | UnqualifiedBest() ): parent: UnqualifiedNode = node._parcel[0] new_ancestry, new_child, ancestry_names = self.split_partition_ancestry( @@ -963,6 +964,8 @@ def split_partition_ancestry( build_node[0] = UnqualifiedOrderBy(build_node[0], *node._parcel[1:]) case UnqualifiedSingular(): build_node[0] = UnqualifiedSingular(build_node[0], *node._parcel[1:]) + case UnqualifiedBest(): + build_node[0] = UnqualifiedBest(build_node[0], *node._parcel[1:]) case _: # Any other unqualified node would mean something is malformed. raise PyDoughUnqualifiedException( diff --git a/tests/conftest.py b/tests/conftest.py index 32695a9a0..44de583c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -425,3 +425,120 @@ def sqlite_technograph_connection() -> DatabaseContext: # Return the database context. return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) + + +@pytest.fixture(scope="session") +def get_pagerank_graph() -> graph_fetcher: + """ + A function that returns the graph used for PageRank calculations. + """ + + @cache + def impl(name: str) -> GraphMetadata: + return pydough.parse_json_metadata_from_file( + file_path=f"{os.path.dirname(__file__)}/test_metadata/pagerank_graphs.json", + graph_name="PAGERANK", + ) + + return impl + + +@pytest.fixture(scope="session") +def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: + """ + Returns the SQLITE database contexts for the various pagerank database. + """ + # Setup the directory to be the main PyDough directory. + base_dir: str = os.path.dirname(os.path.dirname(__file__)) + + # Outputs verfied via https://pagerank-visualizer.netlify.app/ + pagerank_configs = [ + ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), + ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), + ( + "PAGERANK_C", + 8, + [ + (1, 2), + (1, 6), + (2, 1), + (2, 5), + (2, 6), + (3, 2), + (4, 2), + (4, 5), + (5, 3), + (7, 8), + (8, 7), + ], + ), + ( + "PAGERANK_D", + 16, + [ + (1, 2), + (1, 3), + (1, 4), + (1, 5), + (2, 1), + (2, 5), + (3, 2), + (4, 2), + (4, 5), + (4, 11), + (5, 3), + (5, 11), + (5, 14), + (5, 16), + (6, 7), + (7, 8), + (8, 6), + (8, 7), + (9, 2), + (9, 10), + (11, 12), + (12, 13), + (12, 14), + (13, 4), + (13, 5), + (15, 2), + ], + ), + ] + + # Setup the pagerank databases. + result: dict[str, DatabaseContext] = {} + for name, nodes, vertices in pagerank_configs: + subprocess.run( + f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", + shell=True, + ) + path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") + connection: sqlite3.Connection = sqlite3.connect(path) + cursor: sqlite3.Cursor = connection.cursor() + for site in range(nodes): + cursor.execute( + "INSERT INTO SITES VALUES (?, ?)", + (site + 1, f"SITE {chr(ord('A') + site)}"), + ) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site + 1, site + 1), + ) + no_links: set[int] = set(range(1, nodes + 1)) + for src, dst in vertices: + no_links.discard(src) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (src, dst), + ) + for site in no_links: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, None), + ) + cursor.connection.commit() + result[name] = DatabaseContext( + DatabaseConnection(connection), DatabaseDialect.SQLITE + ) + return result diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql new file mode 100644 index 000000000..cb6758196 --- /dev/null +++ b/tests/gen_data/init_pagerank.sql @@ -0,0 +1,42 @@ +-- TODO + +CREATE TABLE SITES ( + s_key INTEGER NOT NULL, + s_name TEXT NOT NULL +); + +CREATE TABLE LINKS ( + l_source INTEGER NOT NULL, + l_target INTEGER +); + +-- INSERT INTO SITES (s_key, s_name) VALUES +-- (1, 'Site A'), +-- (2, 'Site B'), +-- (3, 'Site C'), +-- (4, 'Site D'), +-- (5, 'Site E') +-- ; + +-- INSERT INTO LINKS (l_source, l_target) VALUES +-- (1, 2), (1, 3), (1, 4), (1, 5), +-- (2, 1), (2, 3), +-- (3, NULL), +-- (4, 1), (4, 2), (4, 3), +-- (5, 1), (5, 4) +-- ; + +-- INSERT INTO SITES (s_key, s_name) VALUES +-- (1, 'Site A'), +-- (2, 'Site B'), +-- (3, 'Site C'), +-- (4, 'Site D') +-- ; + +-- INSERT INTO LINKS (l_source, l_target) VALUES +-- (1, 2), +-- (2, 1), (2, 3), +-- (3, 4), +-- (4, 1), (4, 2) +-- ; + diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e9adc64a8..0e3780069 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1289,7 +1289,7 @@ def test_graph_structure( ) def unqualified_exploration_test_data( request, -) -> tuple[str, Callable[[], UnqualifiedNode], str, str]: +) -> tuple[str, Callable[..., UnqualifiedNode], str, str]: """ Testing data used for test_unqualified_node_exploration. Returns a tuple of the graph name to use, a function that takes in a graph and returns the @@ -1298,7 +1298,7 @@ def unqualified_exploration_test_data( without verbose mode. """ graph_name: str = request.param[0] - test_impl: Callable[[], UnqualifiedNode] = request.param[1] + test_impl: Callable[..., UnqualifiedNode] = request.param[1] verbose_refsol: str = request.param[2] non_verbose_refsol: str = request.param[3] return graph_name, test_impl, verbose_refsol.strip(), non_verbose_refsol.strip() @@ -1313,7 +1313,7 @@ def unqualified_exploration_test_data( ) def test_unqualified_node_exploration( unqualified_exploration_test_data: tuple[ - str, Callable[[], UnqualifiedNode], str, str + str, Callable[..., UnqualifiedNode], str, str ], verbose: bool, get_sample_graph: graph_fetcher, diff --git a/tests/test_metadata/pagerank_graphs.json b/tests/test_metadata/pagerank_graphs.json new file mode 100644 index 000000000..bd4150ceb --- /dev/null +++ b/tests/test_metadata/pagerank_graphs.json @@ -0,0 +1,64 @@ +[ + { + "name": "PAGERANK", + "version": "V2", + "collections": [ + { + "name": "sites", + "type": "simple table", + "table path": "main.SITES", + "unique properties": ["key"], + "properties": [ + {"name": "key", "type": "table column", "column name": "s_key", "data type": "numeric"}, + {"name": "name", "type": "table column", "column name": "s_name", "data type": "string"} + ] + }, + { + "name": "links", + "type": "simple table", + "table path": "main.LINKS", + "unique properties": [["source_key", "target_key"]], + "properties": [ + {"name": "source_key", "type": "table column", "column name": "l_source", "data type": "numeric"}, + {"name": "target_key", "type": "table column", "column name": "l_target", "data type": "numeric"} + ] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "outgoing_links", + "parent collection": "sites", + "child collection": "links", + "singular": false, + "always matches": true, + "keys": {"key": ["source_key"]} + }, + { + "type": "reverse", + "name": "source_site", + "original parent": "sites", + "original property": "outgoing_links", + "singular": true, + "always matches": true + }, + { + "type": "general join", + "name": "incoming_links", + "parent collection": "sites", + "child collection": "links", + "singular": false, + "always matches": true, + "condition": "ABSENT(other.target_key) | (self.key == other.target_key)" + }, + { + "type": "reverse", + "name": "target_site", + "original parent": "sites", + "original property": "incoming_links", + "singular": false, + "always matches": true + } + ] + } +] \ No newline at end of file diff --git a/tests/test_pipeline_defog.py b/tests/test_pipeline_defog.py index e21eefc11..f22ee3bca 100644 --- a/tests/test_pipeline_defog.py +++ b/tests/test_pipeline_defog.py @@ -1122,7 +1122,7 @@ def test_defog_until_sql( """ Tests the conversion of the defog analytical questions to SQL. """ - unqualified_impl: Callable[[], UnqualifiedNode] = ( + unqualified_impl: Callable[..., UnqualifiedNode] = ( defog_pipeline_test_data.pydough_function ) graph_name: str = defog_pipeline_test_data.graph_name diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index eea8fb774..f254c58fa 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1647,7 +1647,7 @@ def test_pipeline_e2e_defog_custom( ], ) def test_defog_e2e_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], graph_name: str, error_message: str, defog_graphs: graph_fetcher, diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py new file mode 100644 index 000000000..ead18dd81 --- /dev/null +++ b/tests/test_pipeline_pagerank.py @@ -0,0 +1,256 @@ +""" +Integration tests for the PyDough workflow with custom questions on the TPC-H +dataset. +""" + +from collections.abc import Callable + +import pandas as pd +import pytest + +from pydough.database_connectors import DatabaseContext, DatabaseDialect +from tests.test_pydough_functions.simple_pydough_functions import pagerank + +from .testing_utilities import PyDoughPandasTest, graph_fetcher + + +@pytest.fixture( + params=[ + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.25] * 4, + } + ), + "pagerank_a0", + order_sensitive=True, + args=[0], + ), + id="pagerank_a0", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.25, 0.35625, 0.14375, 0.25], + } + ), + "pagerank_a1", + order_sensitive=True, + args=[1], + ), + id="pagerank_a1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.29516, 0.35625, 0.18891, 0.15969], + } + ), + "pagerank_a2", + order_sensitive=True, + args=[2], + ), + id="pagerank_a2", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_A", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4], + "page_rank": [0.27205, 0.34791, 0.18787, 0.19218], + } + ), + "pagerank_a6", + order_sensitive=True, + args=[6], + ), + id="pagerank_a6", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.2] * 5, + } + ), + "pagerank_b0", + order_sensitive=True, + args=[0], + ), + id="pagerank_b0", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.115, 0.455, 0.2, 0.03, 0.2], + } + ), + "pagerank_b1", + order_sensitive=True, + args=[1], + ), + id="pagerank_b1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_B", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.16196, 0.40262, 0.23071, 0.03, 0.17471], + } + ), + "pagerank_b3", + order_sensitive=True, + args=[3], + ), + id="pagerank_b3", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_D", + lambda: pd.DataFrame( + { + "key": range(1, 17), + "page_rank": [ + 0.0459, + 0.18314, + 0.0459, + 0.05918, + 0.10345, + 0.0459, + 0.09902, + 0.07246, + 0.01934, + 0.0459, + 0.05033, + 0.07246, + 0.0459, + 0.05918, + 0.01934, + 0.03262, + ], + } + ), + "pagerank_d1", + order_sensitive=True, + args=[1], + ), + id="pagerank_d1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_D", + lambda: pd.DataFrame( + { + "key": range(1, 17), + "page_rank": [ + 0.06896, + 0.11157, + 0.05385, + 0.04884, + 0.11486, + 0.05966, + 0.10651, + 0.10618, + 0.01647, + 0.02365, + 0.05369, + 0.06529, + 0.04508, + 0.06876, + 0.01647, + 0.04015, + ], + } + ), + "pagerank_d5", + order_sensitive=True, + args=[5], + ), + id="pagerank_d5", + ), + ], +) +def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: + """ + Test data for e2e tests on custom queries using the TPC-H database and + sqlite UDFs. Returns an instance of PyDoughPandasTest containing + information about the test. + """ + return request.param + + +def test_pipeline_until_relational_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + get_plan_test_filename: Callable[[str], str], + update_tests: bool, +) -> None: + """ + Verifies the generated relational plans for the pagerank tests. + """ + file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) + pagerank_pipeline_test_data.run_relational_test( + get_pagerank_graph, file_path, update_tests + ) + + +def test_pipeline_until_sql_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + get_sql_test_filename: Callable[[str, DatabaseDialect], str], + sqlite_pagerank_db_contexts: dict[str, DatabaseContext], + update_tests: bool, +) -> None: + """ + Verifies the generated SQL for the pagerank tests. + """ + ctx: DatabaseContext = sqlite_pagerank_db_contexts[ + pagerank_pipeline_test_data.graph_name + ] + file_path: str = get_sql_test_filename( + pagerank_pipeline_test_data.test_name, ctx.dialect + ) + pagerank_pipeline_test_data.run_sql_test( + get_pagerank_graph, file_path, update_tests, ctx + ) + + +@pytest.mark.execute +def test_pipeline_e2e_pagerank( + pagerank_pipeline_test_data: PyDoughPandasTest, + get_pagerank_graph: graph_fetcher, + sqlite_pagerank_db_contexts: dict[str, DatabaseContext], +): + """ + Verifies the final output answer for the pagerank tests. + """ + pagerank_pipeline_test_data.run_e2e_test( + get_pagerank_graph, + sqlite_pagerank_db_contexts[pagerank_pipeline_test_data.graph_name], + ) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 56a44feb8..e7ee39ad6 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -3312,7 +3312,7 @@ def test_pipeline_e2e_tpch_custom( ], ) def test_pipeline_e2e_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], columns: dict[str, str] | list[str] | None, error_message: str, get_sample_graph: graph_fetcher, diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 42ec6c57d..6ef51125e 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -437,7 +437,7 @@ def test_pipeline_e2e_tpch_sqlite_udf( ], ) def test_pipeline_tpch_sqlite_udf_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], error_message: str, get_udf_graph: graph_fetcher, ): diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index e595a8632..4c60d19f0 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations)}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t0.n_nations, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations': 1:numeric, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 905b6bb93..5a0d2c111 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cust), ('n_machine_high_orders', n_machine_high_orders), ('n_machine_high_domestic_lines', n_machine_high_domestic_lines)], orderings=[(anything_n_name):asc_first]) - PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) - FILTER(condition=sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + PROJECT(columns={'anything_n_name': anything_n_name, 'n_machine_cust': DEFAULT_TO(n_rows, 0:numeric), 'n_machine_high_domestic_lines': DEFAULT_TO(sum_sum_sum_sum_n_rows, 0:numeric), 'n_machine_high_orders': DEFAULT_TO(sum_n_rows, 0:numeric)}) + FILTER(condition=sum_sum_sum_sum_n_rows > 0:numeric & sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_sum_n_rows': sum_sum_sum_sum_n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_sum_sum_n_rows': SUM(sum_sum_sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,8 +10,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cus SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'sum_sum_n_rows': t1.sum_sum_n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) @@ -22,23 +22,25 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_machine_cus SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows}) + AGGREGATE(keys={'c_custkey': c_custkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'c_custkey': t0.c_custkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'o_orderkey': t0.o_orderkey}) + AGGREGATE(keys={'c_custkey': c_custkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 76bd980c0..63b2ed66a 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,13 +1,12 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations_0), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations_0': t0.n_nations_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) - PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations_0': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': SUM(n_nations), 'n_suppliers': SUM(n_suppliers)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t0.n_nations, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + PROJECT(columns={'n_nationkey': n_nationkey, 'n_nations': 1:numeric, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index b37c12250..e0bb5bd1f 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', sum_sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', n_orders), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'n_parts': t1.n_parts, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_sum_expr_18_0, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_1, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_parts': sum_agg_22, 'n_regionkey': n_regionkey, 'n_suppliers': sum_sum_expr_18, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_22': SUM(agg_22), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,10 +12,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': SUM(expr_18), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18': t0.expr_18, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + PROJECT(columns={'expr_18': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index d379695ad..2a50927f0 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -3,7 +3,7 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'n_nations': sum_agg_1, 'n_orders_94': DEFAULT_TO(sum_sum_expr_7, 0:numeric), 'n_orders_95': DEFAULT_TO(sum_sum_expr_10, 0:numeric), 'n_orders_96': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'n_suppliers': sum_agg_29, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': SUM(agg_1), 'sum_agg_29': SUM(agg_29), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr_10': sum_expr_10, 'sum_expr_7': sum_expr_7, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -21,6 +21,5 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_ AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index c71a26a59..2b51e5998 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,13 +1,12 @@ -ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t1.sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': SUM(agg_1), 'sum_agg_8': SUM(agg_8)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.agg_1, 'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) PROJECT(columns={'agg_1': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_n_rows': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 171bc4f3e..2dc58c1e0 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -2,12 +2,11 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_ JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': SUM(agg_2)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.agg_2, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) PROJECT(columns={'agg_2': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_suppliers': SUM(n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 17e99a1c8..a9bfb0e30 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,9 +1,9 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_sum_sum_n_rows), ('n_suppliers', sum_sum_sum_expr_18_0)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_sum_expr_18_0': t1.sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22), ('n_suppliers', sum_sum_expr_18)], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.n_nations, 'n_orders': t1.n_orders, 'r_name': t0.r_name, 'sum_agg_22': t1.sum_agg_22, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_expr_18_0': sum_sum_sum_expr_18_0, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_expr_18_0': SUM(sum_sum_expr_18_0), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_expr_18_0': t1.sum_sum_expr_18_0, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + PROJECT(columns={'n_nations': sum_agg_0, 'n_orders': DEFAULT_TO(sum_sum_n_rows, 0:numeric), 'n_regionkey': n_regionkey, 'sum_agg_22': sum_agg_22, 'sum_n_rows': sum_n_rows, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': SUM(agg_0), 'sum_agg_22': SUM(agg_22), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_0': t0.agg_0, 'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) PROJECT(columns={'agg_0': 1:numeric, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -12,10 +12,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', n_orders) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_sum_expr_18_0': SUM(sum_expr_18_0), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18_0': SUM(expr_18_0), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18_0': t0.expr_18_0, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) - PROJECT(columns={'expr_18_0': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': SUM(expr_18), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'expr_18': t0.expr_18, 'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + PROJECT(columns={'expr_18': 1:numeric, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d31f6b6ff..841e83d3d 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n', n)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={}) FILTER(condition=p_retailprice < global_avg_price * 0.85:numeric & p_retailprice < ps_supplycost * 1.5:numeric & p_retailprice < supplier_avg_price, columns={'s_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'p_retailprice': t1.p_retailprice, 'ps_supplycost': t0.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 51fe077d4..8b74cc17a 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('fullname', fname)], orderings=[(fname):asc_first]) - PROJECT(columns={'fname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) +ROOT(columns=[('fullname', fullname)], orderings=[(fullname):asc_first]) + PROJECT(columns={'fullname': JOIN_STRINGS('-':string, LOWER(r_name), lname)}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'lname': t0.lname, 'r_name': t1.r_name}) PROJECT(columns={'lname': LOWER(n_name), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index b3da2efc6..4f028747f 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n_sizes', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) FILTER(condition=n_rows > avg_n_parts, columns={}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_n_parts': t0.avg_n_parts, 'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index d884fa852..d9450143b 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n_sizes', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) FILTER(condition=n_rows > avg_n_combo, columns={}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_n_combo': t0.avg_n_combo, 'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 2f9917c6a..4b3ac6075 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) +ROOT(columns=[('n', n)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n': COUNT()}) JOIN(condition=MONTH(t1.o_orderdate) == MONTH(t0.first_order_date) & YEAR(t1.o_orderdate) == YEAR(t0.first_order_date), type=INNER, cardinality=PLURAL_UNKNOWN, columns={}) AGGREGATE(keys={}, aggregations={'first_order_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/pagerank_a0.txt b/tests/test_plan_refsols/pagerank_a0.txt new file mode 100644 index 000000000..89355a8da --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a0.txt @@ -0,0 +1,7 @@ +ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) + PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) + PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt new file mode 100644 index 000000000..d79575279 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -0,0 +1,28 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt new file mode 100644 index 000000000..c1eec12ef --- /dev/null +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -0,0 +1,76 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b0.txt b/tests/test_plan_refsols/pagerank_b0.txt new file mode 100644 index 000000000..89355a8da --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b0.txt @@ -0,0 +1,7 @@ +ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) + PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) + PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt new file mode 100644 index 000000000..e02382e76 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -0,0 +1,40 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt new file mode 100644 index 000000000..f063615ae --- /dev/null +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -0,0 +1,52 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt new file mode 100644 index 000000000..28da857bf --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d1.txt @@ -0,0 +1,16 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt new file mode 100644 index 000000000..a3ca6ed77 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -0,0 +1,64 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) + FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 7193ed6f9..80c4fe8aa 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -2,22 +2,23 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[( AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(percentage)}) PROJECT(columns={'percentage': 100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric), 'supp_region': supp_region}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 6fc5248d8..250faeee6 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3030,3 +3030,30 @@ def quantile_function_test_4(): orders_99_percent=QUANTILE(selected_orders.total_price, 0.99), orders_max=QUANTILE(selected_orders.total_price, 1.0), ) + + +def pagerank(n_iters): + d = 0.85 + n_out_expr = SUM( + outgoing_links.CALCULATE( + n_target=IFF(ABSENT(target_key), n, INTEGER((source_key != target_key))) + ).n_target + ) + source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + for i in range(n_iters): + group_name = f"s{i}" + source = ( + source.outgoing_links.CALCULATE( + consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) + ) + .target_site.PARTITION(name=group_name, by=key) + .target_site.CALCULATE( + n, + page_rank=(1.0 - d) / n + + d * RELSUM(consider_link * page_rank / n_out, per=group_name), + ) + .BEST(per=group_name, by=key.ASC()) + ) + if i < n_iters - 1: + source = source.CALCULATE(n_out=n_out_expr) + return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index 2c0a13b85..da64086e4 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -195,7 +195,7 @@ ], ) def test_pydough_to_sql_tpch( - pydough_code: Callable[[], UnqualifiedNode], + pydough_code: Callable[..., UnqualifiedNode], columns: dict[str, str] | list[str] | None, test_name: str, get_sample_graph: graph_fetcher, @@ -278,7 +278,7 @@ def test_pydough_to_sql_tpch( ], ) def test_pydough_to_sql_defog( - pydough_code: Callable[[], UnqualifiedNode], + pydough_code: Callable[..., UnqualifiedNode], test_name: str, graph_name: str, defog_graphs: graph_fetcher, diff --git a/tests/test_qualification.py b/tests/test_qualification.py index aa5da57fa..17ddbc553 100644 --- a/tests/test_qualification.py +++ b/tests/test_qualification.py @@ -945,7 +945,7 @@ ], ) def test_qualify_node_to_ast_string( - impl: Callable[[], UnqualifiedNode], + impl: Callable[..., UnqualifiedNode], answer_tree_str: str, get_sample_graph: graph_fetcher, default_config: PyDoughConfigs, @@ -1047,7 +1047,7 @@ def test_qualify_node_to_ast_string( ], ) def test_qualify_node_collation( - impl: Callable[[], UnqualifiedNode], + impl: Callable[..., UnqualifiedNode], answer_tree_str: str, collation_default_asc: bool, propagate_collation: bool, diff --git a/tests/test_sql_refsols/pagerank_a0_sqlite.sql b/tests/test_sql_refsols/pagerank_a0_sqlite.sql new file mode 100644 index 000000000..4fe8f22be --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a0_sqlite.sql @@ -0,0 +1,16 @@ +WITH _s0 AS ( + SELECT + COUNT(*) OVER () AS agg_2, + s_key + FROM main.sites +) +SELECT + MAX(_s0.s_key) AS key, + ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank +FROM _s0 AS _s0 +JOIN main.links AS links + ON _s0.s_key = links.l_source +GROUP BY + _s0.s_key +ORDER BY + MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql new file mode 100644 index 000000000..32a7c2048 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -0,0 +1,99 @@ +WITH _t14 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t14 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t15 + ON _s2.anything_s_key = _t15.l_source + JOIN _t14 AS _s5 + ON _s5.s_key = _t15.l_target OR _t15.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t16 + ON _s8.anything_s_key = _t16.l_source + JOIN _t14 AS _s11 + ON _s11.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_2 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql new file mode 100644 index 000000000..9bdfb807c --- /dev/null +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -0,0 +1,279 @@ +WITH _t38 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t38 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t33 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t39.l_source <> _t39.l_target OR _t39.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t39 + ON _s2.anything_s_key = _t39.l_source + JOIN _t38 AS _s5 + ON _s5.s_key = _t39.l_target OR _t39.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t33 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t27 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t40.l_source <> _t40.l_target OR _t40.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t40 + ON _s8.anything_s_key = _t40.l_source + JOIN _t38 AS _s11 + ON _s11.s_key = _t40.l_target OR _t40.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t27 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t41.l_source <> _t41.l_target OR _t41.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t41 + ON _s14.anything_s_key = _t41.l_source + JOIN _t38 AS _s17 + ON _s17.s_key = _t41.l_target OR _t41.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t42.l_source <> _t42.l_target OR _t42.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s20.anything_anything_anything_anything_n, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t42 + ON _s20.anything_s_key = _t42.l_source + JOIN _t38 AS _s23 + ON _s23.s_key = _t42.l_target OR _t42.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s26 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s25.l_target IS NULL, + _t.anything_anything_anything_anything_n, + CAST(_s25.l_source <> _s25.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_4 AS _t + JOIN _s1 AS _s25 + ON _s25.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t43.l_source <> _t43.l_target OR _t43.l_target IS NULL AS INTEGER) * _s26.page_rank + ) AS REAL) / _s26.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, + _s26.anything_anything_anything_anything_anything_n, + _s29.s_key + FROM _s26 AS _s26 + JOIN _s1 AS _t43 + ON _s26.anything_s_key = _t43.l_source + JOIN _t38 AS _s29 + ON _s29.s_key = _t43.l_target OR _t43.l_target IS NULL +), _t_5 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s32 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s31.l_target IS NULL, + _t.anything_anything_anything_anything_anything_n, + CAST(_s31.l_source <> _s31.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_5 AS _t + JOIN _s1 AS _s31 + ON _s31.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s32.anything_anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t44.l_source <> _t44.l_target OR _t44.l_target IS NULL AS INTEGER) * _s32.page_rank + ) AS REAL) / _s32.n_out + ) OVER (PARTITION BY _s35.s_key) AS page_rank_0, + _s35.s_key + FROM _s32 AS _s32 + JOIN _s1 AS _t44 + ON _s32.anything_s_key = _t44.l_source + JOIN _t38 AS _s35 + ON _s35.s_key = _t44.l_target OR _t44.l_target IS NULL +), _t_6 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_6 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_b0_sqlite.sql b/tests/test_sql_refsols/pagerank_b0_sqlite.sql new file mode 100644 index 000000000..4fe8f22be --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b0_sqlite.sql @@ -0,0 +1,16 @@ +WITH _s0 AS ( + SELECT + COUNT(*) OVER () AS agg_2, + s_key + FROM main.sites +) +SELECT + MAX(_s0.s_key) AS key, + ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank +FROM _s0 AS _s0 +JOIN main.links AS links + ON _s0.s_key = links.l_source +GROUP BY + _s0.s_key +ORDER BY + MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql new file mode 100644 index 000000000..00086a1c7 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -0,0 +1,144 @@ +WITH _t20 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t20 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t21 + ON _s2.anything_s_key = _t21.l_source + JOIN _t20 AS _s5 + ON _s5.s_key = _t21.l_target OR _t21.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t22 + ON _s8.anything_s_key = _t22.l_source + JOIN _t20 AS _s11 + ON _s11.s_key = _t22.l_target OR _t22.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t23 + ON _s14.anything_s_key = _t23.l_source + JOIN _t20 AS _s17 + ON _s17.s_key = _t23.l_target OR _t23.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_3 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql new file mode 100644 index 000000000..591e82f3d --- /dev/null +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -0,0 +1,189 @@ +WITH _t26 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t26 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t27 + ON _s2.anything_s_key = _t27.l_source + JOIN _t26 AS _s5 + ON _s5.s_key = _t27.l_target OR _t27.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t28 + ON _s8.anything_s_key = _t28.l_source + JOIN _t26 AS _s11 + ON _s11.s_key = _t28.l_target OR _t28.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t29 + ON _s14.anything_s_key = _t29.l_source + JOIN _t26 AS _s17 + ON _s17.s_key = _t29.l_target OR _t29.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t30.l_source <> _t30.l_target OR _t30.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t30 + ON _s20.anything_s_key = _t30.l_source + JOIN _t26 AS _s23 + ON _s23.s_key = _t30.l_target OR _t30.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_4 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql new file mode 100644 index 000000000..549289485 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d1_sqlite.sql @@ -0,0 +1,58 @@ +WITH _t8 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t8 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t9 + ON _s2.anything_s_key = _t9.l_source + JOIN _t8 AS _s5 + ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql new file mode 100644 index 000000000..09af3379b --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -0,0 +1,234 @@ +WITH _t32 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t32 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t27 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t33.l_source <> _t33.l_target OR _t33.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t33 + ON _s2.anything_s_key = _t33.l_source + JOIN _t32 AS _s5 + ON _s5.s_key = _t33.l_target OR _t33.l_target IS NULL +), _t AS ( + SELECT + page_rank_0, + anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t27 +), _s8 AS ( + SELECT + COALESCE( + SUM( + IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_n) AS anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t AS _t + JOIN _s1 AS _s7 + ON _s7.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t21 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t34.l_source <> _t34.l_target OR _t34.l_target IS NULL AS INTEGER) * _s8.page_rank + ) AS REAL) / _s8.n_out + ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, + _s8.anything_anything_n, + _s11.s_key + FROM _s8 AS _s8 + JOIN _s1 AS _t34 + ON _s8.anything_s_key = _t34.l_source + JOIN _t32 AS _s11 + ON _s11.s_key = _t34.l_target OR _t34.l_target IS NULL +), _t_2 AS ( + SELECT + page_rank_0, + anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t21 +), _s14 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s13.l_target IS NULL, + _t.anything_anything_n, + CAST(_s13.l_source <> _s13.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_n) AS anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_2 AS _t + JOIN _s1 AS _s13 + ON _s13.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t15 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t35.l_source <> _t35.l_target OR _t35.l_target IS NULL AS INTEGER) * _s14.page_rank + ) AS REAL) / _s14.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, + _s14.anything_anything_anything_n, + _s17.s_key + FROM _s14 AS _s14 + JOIN _s1 AS _t35 + ON _s14.anything_s_key = _t35.l_source + JOIN _t32 AS _s17 + ON _s17.s_key = _t35.l_target OR _t35.l_target IS NULL +), _t_3 AS ( + SELECT + page_rank_0, + anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t15 +), _s20 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s19.l_target IS NULL, + _t.anything_anything_anything_n, + CAST(_s19.l_source <> _s19.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_3 AS _t + JOIN _s1 AS _s19 + ON _s19.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t9 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t36.l_source <> _t36.l_target OR _t36.l_target IS NULL AS INTEGER) * _s20.page_rank + ) AS REAL) / _s20.n_out + ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, + _s20.anything_anything_anything_anything_n, + _s23.s_key + FROM _s20 AS _s20 + JOIN _s1 AS _t36 + ON _s20.anything_s_key = _t36.l_source + JOIN _t32 AS _s23 + ON _s23.s_key = _t36.l_target OR _t36.l_target IS NULL +), _t_4 AS ( + SELECT + page_rank_0, + anything_anything_anything_anything_n, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t9 +), _s26 AS ( + SELECT + COALESCE( + SUM( + IIF( + _s25.l_target IS NULL, + _t.anything_anything_anything_anything_n, + CAST(_s25.l_source <> _s25.l_target AS INTEGER) + ) + ), + 0 + ) AS n_out, + MAX(_t.page_rank_0) AS page_rank, + MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, + MAX(_t.s_key) AS anything_s_key + FROM _t_4 AS _t + JOIN _s1 AS _s25 + ON _s25.l_source = _t.s_key + WHERE + _t._w = 1 + GROUP BY + _t.s_key +), _t3 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t37.l_source <> _t37.l_target OR _t37.l_target IS NULL AS INTEGER) * _s26.page_rank + ) AS REAL) / _s26.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, + _s29.s_key + FROM _s26 AS _s26 + JOIN _s1 AS _t37 + ON _s26.anything_s_key = _t37.l_source + JOIN _t32 AS _s29 + ON _s29.s_key = _t37.l_target OR _t37.l_target IS NULL +), _t_5 AS ( + SELECT + page_rank_0, + s_key, + ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w + FROM _t3 +) +SELECT + s_key AS key, + ROUND(page_rank_0, 5) AS page_rank +FROM _t_5 +WHERE + _w = 1 +ORDER BY + s_key diff --git a/tests/test_unqualified_node.py b/tests/test_unqualified_node.py index 1e04d6d29..0baebad45 100644 --- a/tests/test_unqualified_node.py +++ b/tests/test_unqualified_node.py @@ -569,7 +569,7 @@ def test_unqualified_to_string( ], ) def test_init_pydough_context( - func: Callable[[], UnqualifiedNode], + func: Callable[..., UnqualifiedNode], as_string: str, get_sample_graph: graph_fetcher, ) -> None: @@ -579,7 +579,7 @@ def test_init_pydough_context( at least based on string representation. """ sample_graph: GraphMetadata = get_sample_graph("TPCH") - new_func: Callable[[], UnqualifiedNode] = init_pydough_context(sample_graph)(func) + new_func: Callable[..., UnqualifiedNode] = init_pydough_context(sample_graph)(func) answer: UnqualifiedNode = new_func() assert repr(answer) == as_string, ( "Mismatch between string representation of unqualified nodes and expected output" @@ -748,7 +748,7 @@ def test_init_pydough_context( ], ) def test_unqualified_errors( - func: Callable[[], UnqualifiedNode], + func: Callable[..., UnqualifiedNode], error_msg: str, get_sample_graph: graph_fetcher, ) -> None: @@ -757,6 +757,6 @@ def test_unqualified_errors( exception during the conversion to unqualified nodes. """ sample_graph: GraphMetadata = get_sample_graph("TPCH") - new_func: Callable[[], UnqualifiedNode] = init_pydough_context(sample_graph)(func) + new_func: Callable[..., UnqualifiedNode] = init_pydough_context(sample_graph)(func) with pytest.raises(Exception, match=error_msg): new_func() diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 8c55689b2..d95d35562 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -904,8 +904,9 @@ def make_relational_ordering( def transform_and_exec_pydough( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], graph: GraphMetadata, + args: list[Any] | None, ) -> UnqualifiedNode: """ Obtains the unqualified node from a PyDough function by invoking the @@ -914,12 +915,14 @@ def transform_and_exec_pydough( Args: `pydough_impl`: The PyDough function to be transformed and executed. `graph`: The metadata being used. + `args`: The arguments to pass to the PyDough function, if any. Returns: The unqualified node created by running the transformed version of `pydough_impl`. """ - return init_pydough_context(graph)(pydough_impl)() + args = args if args is not None else [] + return init_pydough_context(graph)(pydough_impl)(*args) @dataclass @@ -930,7 +933,7 @@ class PyDoughSQLComparisonTest: SQL query. """ - pydough_function: Callable[[], UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] """ Function that returns the PyDough code evaluated by the unit test. """ @@ -988,7 +991,9 @@ def run_e2e_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, None + ) # Obtain the DataFrame result from the PyDough code call_kwargs: dict = {"metadata": graph, "database": database} @@ -1037,9 +1042,10 @@ class PyDoughPandasTest: - `fix_column_names` (optional): if True, ignore whatever column names are in the output and just use the same column names as in the reference solution. + - `args` (optional): additional arguments to pass to the PyDough function. """ - pydough_function: Callable[[], UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] """ Function that returns the PyDough code evaluated by the unit test. """ @@ -1078,6 +1084,12 @@ class PyDoughPandasTest: same column names as in the reference solution. """ + args: list[Any] | None = None + """ + Any additional arguments to pass to the PyDough function when + executing it. If None, no additional arguments are passed. + """ + def run_relational_test( self, fetcher: graph_fetcher, @@ -1101,7 +1113,9 @@ def run_relational_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Run the PyDough code through the pipeline up until it is converted to # a relational plan. @@ -1153,7 +1167,9 @@ def run_sql_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Convert the PyDough code to SQL text call_kwargs: dict = {"metadata": graph, "database": database} @@ -1196,7 +1212,9 @@ def run_e2e_test( """ # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) - root: UnqualifiedNode = transform_and_exec_pydough(self.pydough_function, graph) + root: UnqualifiedNode = transform_and_exec_pydough( + self.pydough_function, graph, self.args + ) # Obtain the DataFrame result from the PyDough code call_kwargs: dict = { @@ -1229,7 +1247,7 @@ def run_e2e_test( def run_e2e_error_test( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode], error_message: str, graph: GraphMetadata, columns: dict[str, str] | list[str] | None = None, @@ -1250,7 +1268,7 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): - root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph) + root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph, None) call_kwargs: dict = {} if graph is not None: call_kwargs["metadata"] = graph From b121c313b16eb49bbcce082714e056dd7f144dd7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:40:40 -0400 Subject: [PATCH 28/97] Started adding comments --- tests/conftest.py | 14 +++++-- tests/gen_data/init_pagerank.sql | 40 ++++--------------- .../simple_pydough_functions.py | 15 +++++++ 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 44de583c5..bbe8cd7d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -525,18 +525,26 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: "INSERT INTO LINKS VALUES (?, ?)", (site + 1, site + 1), ) - no_links: set[int] = set(range(1, nodes + 1)) + no_incoming: set[int] = set(range(1, nodes + 1)) + no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: - no_links.discard(src) + no_outgoing.discard(src) + no_incoming.discard(dst) cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (src, dst), ) - for site in no_links: + for site in no_outgoing: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, None), ) + if len(no_outgoing) == 0: + for site in no_incoming: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) cursor.connection.commit() result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index cb6758196..503d6fd9b 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -1,4 +1,11 @@ --- TODO +-- Custom SQL schema to initialize a custom PageRank database with tables for +-- web sites and links between them. The following assumptions are made: +-- 1. Websites without any outgoing links have an edge (key, NULL) in the LINKS +-- table, to denote that the page implicitly links to all other pages. +-- 2. If there are no websites without any outgoing links, then any websites +-- without incoming links have a dummy self-link for simplicity, which +-- should not be counted in the PageRank calculation (but is required for +-- joins to work). CREATE TABLE SITES ( s_key INTEGER NOT NULL, @@ -9,34 +16,3 @@ CREATE TABLE LINKS ( l_source INTEGER NOT NULL, l_target INTEGER ); - --- INSERT INTO SITES (s_key, s_name) VALUES --- (1, 'Site A'), --- (2, 'Site B'), --- (3, 'Site C'), --- (4, 'Site D'), --- (5, 'Site E') --- ; - --- INSERT INTO LINKS (l_source, l_target) VALUES --- (1, 2), (1, 3), (1, 4), (1, 5), --- (2, 1), (2, 3), --- (3, NULL), --- (4, 1), (4, 2), (4, 3), --- (5, 1), (5, 4) --- ; - --- INSERT INTO SITES (s_key, s_name) VALUES --- (1, 'Site A'), --- (2, 'Site B'), --- (3, 'Site C'), --- (4, 'Site D') --- ; - --- INSERT INTO LINKS (l_source, l_target) VALUES --- (1, 2), --- (2, 1), (2, 3), --- (3, 4), --- (4, 1), (4, 2) --- ; - diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 250faeee6..107106ad9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3033,6 +3033,21 @@ def quantile_function_test_4(): def pagerank(n_iters): + """ + Computes the PageRank computation on the PAGERANK graph, starting with the + base page_rank values with an even distribution of 1.0 / n, where n is the + number of sites in the graph, then iteratively updates the page_rank values + based on the outgoing links and the damping factor d. Repeats the process + for n_iters iterations, returning the final page_rank values for each site, + rounded to 5 decimal places. Makes the following assumptions: + + - If a site has no outgoing links, then it has a single entry in + `outgoing_links` where `target_key` is null. + - If there is a site with no incoming links, and there are no sites w/o + any outgoing links, the site w/o the incoming link has a dummy link where + the source & target key are the same, which should be ignored in the + PageRank calculation. + """ d = 0.85 n_out_expr = SUM( outgoing_links.CALCULATE( From 05f71471d86e59928ae729384dfc2d69e6d9d983 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 14:40:54 -0400 Subject: [PATCH 29/97] Started adding comments --- tests/gen_data/init_pagerank.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index 503d6fd9b..c94ba5957 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -5,7 +5,7 @@ -- 2. If there are no websites without any outgoing links, then any websites -- without incoming links have a dummy self-link for simplicity, which -- should not be counted in the PageRank calculation (but is required for --- joins to work). +-- joins to work). CREATE TABLE SITES ( s_key INTEGER NOT NULL, From 2697b108ee2827867cb712b1176d54c212baeff1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 15:48:30 -0400 Subject: [PATCH 30/97] Added comments --- tests/conftest.py | 35 +++++++++++++++---- tests/gen_data/gen_technograph.py | 1 + tests/test_pipeline_pagerank.py | 3 +- .../simple_pydough_functions.py | 19 ++++++++++ 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bbe8cd7d9..1369621fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -430,7 +430,9 @@ def sqlite_technograph_connection() -> DatabaseContext: @pytest.fixture(scope="session") def get_pagerank_graph() -> graph_fetcher: """ - A function that returns the graph used for PageRank calculations. + A function that returns the graph used for PageRank calculations. The same + graph is used for all PageRank tests, but different databases are used that + adhere to the same table schema setup that the graph invokes. """ @cache @@ -447,11 +449,18 @@ def impl(name: str) -> GraphMetadata: def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: """ Returns the SQLITE database contexts for the various pagerank database. + This is returned as a dictionary mapping the name of the database to the + DatabaseContext for that database, all of which adhere to the same + schema structure assumed by the PAGERANK graph. """ # Setup the directory to be the main PyDough directory. base_dir: str = os.path.dirname(os.path.dirname(__file__)) - # Outputs verfied via https://pagerank-visualizer.netlify.app/ + # The configurations for the pagerank databases. Each tuple contains: + # - The name of the database. + # - The number of nodes n in the graph. + # - The edges in the graph as a list of tuples (src, dst), assuming the + # nodes are numbered from 1 to n. pagerank_configs = [ ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), @@ -506,9 +515,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ), ] - # Setup the pagerank databases. + # Setup each of the the pagerank databases using the configurations. result: dict[str, DatabaseContext] = {} for name, nodes, vertices in pagerank_configs: + # Create the database and ensure it is empty. subprocess.run( f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", shell=True, @@ -516,15 +526,16 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") connection: sqlite3.Connection = sqlite3.connect(path) cursor: sqlite3.Cursor = connection.cursor() + + # For every node, insert an entry into the SITES table. for site in range(nodes): cursor.execute( "INSERT INTO SITES VALUES (?, ?)", (site + 1, f"SITE {chr(ord('A') + site)}"), ) - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site + 1, site + 1), - ) + + # For every edge, insert an entry into the LINKS table. Keep track of + # the nodes that have no incoming or outgoing links. no_incoming: set[int] = set(range(1, nodes + 1)) no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: @@ -534,18 +545,28 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: "INSERT INTO LINKS VALUES (?, ?)", (src, dst), ) + + # If there are no outgoing links for a site, insert a NULL link for it, + # indicating that the site links to ALL sites. for site in no_outgoing: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, None), ) + + # IF there are no nodes without outgoing links, then for each node + # without incoming links, insert a dummy link to itself. if len(no_outgoing) == 0: for site in no_incoming: cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (site, site), ) + + # Commit the changes, close the cursor, and store the context in the + # result dictionary. cursor.connection.commit() + cursor.close() result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE ) diff --git a/tests/gen_data/gen_technograph.py b/tests/gen_data/gen_technograph.py index 75ca6cdfd..521ee7861 100644 --- a/tests/gen_data/gen_technograph.py +++ b/tests/gen_data/gen_technograph.py @@ -827,3 +827,4 @@ def gen_technograph_records(cursor: sqlite3.Cursor) -> None: ) cursor.connection.commit() + cursor.close() diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index ead18dd81..7f1eeb6f8 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -228,7 +228,8 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. + Verifies the generated SQL for the pagerank tests. The outputs were + generated using this website: https://pagerank-visualizer.netlify.app/. """ ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 107106ad9..df2aa17f9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3048,15 +3048,31 @@ def pagerank(n_iters): the source & target key are the same, which should be ignored in the PageRank calculation. """ + + # The dampening factor d = 0.85 + + # The expression used to determine the number of sites the graph links to, + # accounting for sites without links (which implicitly link to everything) + # and sites with a dummy link to themselves (which should be ignored). n_out_expr = SUM( outgoing_links.CALCULATE( n_target=IFF(ABSENT(target_key), n, INTEGER((source_key != target_key))) ).n_target ) + + # The seed value for the PageRank computation, which is evenly distributed. + # Also computes the number of sites in the graph, which is used downstream. source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + + # Repeats the following procedure for n_iters iterations to build the next + # generation of PageRank values from the current generation. for i in range(n_iters): group_name = f"s{i}" + # For each site, find all sites that it links to and accumulate the + # PageRank values from the current site (divided by the # of links) in + # those linked sites, while also considering the damping factor. Calls + # .BEST() to ensure each site is included exactly once at the end. source = ( source.outgoing_links.CALCULATE( consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) @@ -3069,6 +3085,9 @@ def pagerank(n_iters): ) .BEST(per=group_name, by=key.ASC()) ) + # Unless we are done, re-derive `n_out` for the current node. if i < n_iters - 1: source = source.CALCULATE(n_out=n_out_expr) + + # Output the final PageRank values, rounded to 5 decimal places, return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) From 94726ff80bb55e797d43432c29c0c00002d8ecae Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:10:23 -0400 Subject: [PATCH 31/97] Fixing c4 test and refactoring the PageRank impl to be simpler & more performant with fewer window functions [RUN CI] --- tests/conftest.py | 18 +- tests/gen_data/init_pagerank.sql | 6 +- tests/test_pipeline_pagerank.py | 25 ++ tests/test_plan_refsols/pagerank_a1.txt | 10 +- tests/test_plan_refsols/pagerank_a2.txt | 45 ++- tests/test_plan_refsols/pagerank_a6.txt | 113 +++---- tests/test_plan_refsols/pagerank_b1.txt | 10 +- tests/test_plan_refsols/pagerank_b3.txt | 58 ++-- tests/test_plan_refsols/pagerank_c4.txt | 79 ++--- tests/test_plan_refsols/pagerank_d1.txt | 10 +- tests/test_plan_refsols/pagerank_d5.txt | 92 +++--- .../simple_pydough_functions.py | 25 +- tests/test_sql_refsols/pagerank_a1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_a2_sqlite.sql | 80 ++--- tests/test_sql_refsols/pagerank_a6_sqlite.sql | 300 +++++------------- tests/test_sql_refsols/pagerank_b1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 135 +++----- tests/test_sql_refsols/pagerank_c4_sqlite.sql | 190 ++++------- tests/test_sql_refsols/pagerank_d1_sqlite.sql | 27 +- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 245 +++++--------- 20 files changed, 537 insertions(+), 985 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1369621fb..ec28ed893 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -535,12 +535,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ) # For every edge, insert an entry into the LINKS table. Keep track of - # the nodes that have no incoming or outgoing links. - no_incoming: set[int] = set(range(1, nodes + 1)) + # the nodes that have no outgoing links. no_outgoing: set[int] = set(range(1, nodes + 1)) for src, dst in vertices: no_outgoing.discard(src) - no_incoming.discard(dst) cursor.execute( "INSERT INTO LINKS VALUES (?, ?)", (src, dst), @@ -554,14 +552,12 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: (site, None), ) - # IF there are no nodes without outgoing links, then for each node - # without incoming links, insert a dummy link to itself. - if len(no_outgoing) == 0: - for site in no_incoming: - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, site), - ) + # Insert a dummy self-link for every site. + for site in range(1, nodes + 1): + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) # Commit the changes, close the cursor, and store the context in the # result dictionary. diff --git a/tests/gen_data/init_pagerank.sql b/tests/gen_data/init_pagerank.sql index c94ba5957..b9de0181d 100644 --- a/tests/gen_data/init_pagerank.sql +++ b/tests/gen_data/init_pagerank.sql @@ -2,10 +2,8 @@ -- web sites and links between them. The following assumptions are made: -- 1. Websites without any outgoing links have an edge (key, NULL) in the LINKS -- table, to denote that the page implicitly links to all other pages. --- 2. If there are no websites without any outgoing links, then any websites --- without incoming links have a dummy self-link for simplicity, which --- should not be counted in the PageRank calculation (but is required for --- joins to work). +-- 2. Every website has a self-link (key, key) in the LINKS table, which should +-- be ignored in PageRank calculations. CREATE TABLE SITES ( s_key INTEGER NOT NULL, diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 7f1eeb6f8..c8a846e5f 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -128,6 +128,31 @@ ), id="pagerank_b3", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_C", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5, 6, 7, 8], + "page_rank": [ + 0.08996, + 0.19353, + 0.11764, + 0.03252, + 0.10377, + 0.12682, + 0.16788, + 0.16788, + ], + } + ), + "pagerank_c4", + order_sensitive=True, + args=[4], + ), + id="pagerank_c4", + ), pytest.param( PyDoughPandasTest( pagerank, diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index d79575279..16646f254 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,28 +1,23 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_20, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_18, columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) + PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t0.consider_link_19, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t1.consider_link_19, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_19': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_18': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index c1eec12ef..f29c64b32 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,76 +1,51 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_590, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_588, columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) + PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t0.consider_link_589, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t1.consider_link_589, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) + FILTER(condition=dummy_link_578, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t0.consider_link_579, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t1.consider_link_579, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) + FILTER(condition=dummy_link_568, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t0.consider_link_569, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t1.consider_link_569, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) + FILTER(condition=dummy_link_558, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t0.consider_link_559, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t1.consider_link_559, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) + FILTER(condition=dummy_link_548, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t0.consider_link_549, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t1.consider_link_549, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_549': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_548': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_559': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_558': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_569': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_568': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_579': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_578': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_589': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_588': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_b1.txt +++ b/tests/test_plan_refsols/pagerank_b1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index e02382e76..d78322a4d 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,40 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index f063615ae..1c504e853 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,52 +1,37 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_134, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_132, columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) + PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t0.consider_link_133, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t1.consider_link_133, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) + FILTER(condition=dummy_link_122, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t0.consider_link_123, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t1.consider_link_123, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) + FILTER(condition=dummy_link_112, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t0.consider_link_113, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t1.consider_link_113, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_113': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_112': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_123': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_122': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_133': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_132': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt index 28da857bf..b3053e78b 100644 --- a/tests/test_plan_refsols/pagerank_d1.txt +++ b/tests/test_plan_refsols/pagerank_d1.txt @@ -1,9 +1,9 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) @@ -11,6 +11,6 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):as PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index a3ca6ed77..fb3b3c239 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,64 +1,44 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_anything_n': t0.anything_anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_anything_n': anything_anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_anything_n': t0.anything_anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_anything_n': anything_anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_anything_n': ANYTHING(anything_anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_anything_n': t0.anything_anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_anything_n': anything_anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_anything_n': ANYTHING(anything_anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'n_target': IFF(ABSENT(l_target), anything_anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_anything_n': anything_anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_anything_n': t0.anything_anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_anything_n': anything_anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': anything_page_rank_0}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_anything_n': ANYTHING(anything_n), 'anything_page_rank_0': ANYTHING(page_rank_0), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'anything_n': anything_n, 'n_target': IFF(ABSENT(l_target), anything_n, INTEGER(l_source != l_target)), 'page_rank_0': page_rank_0, 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t0.s_key}) - FILTER(condition=RANKING(args=[], partition=[s_key], order=[(s_key):asc_last], allow_ties=False) == 1:numeric, columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index df2aa17f9..5fd4aad86 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3062,32 +3062,35 @@ def pagerank(n_iters): ) # The seed value for the PageRank computation, which is evenly distributed. - # Also computes the number of sites in the graph, which is used downstream. + # Also computes the number of sites in the graph & the number of sites each + # site links to, which are both used downstream. source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) # Repeats the following procedure for n_iters iterations to build the next # generation of PageRank values from the current generation. for i in range(n_iters): - group_name = f"s{i}" # For each site, find all sites that it links to and accumulate the # PageRank values from the current site (divided by the # of links) in - # those linked sites, while also considering the damping factor. Calls - # .BEST() to ensure each site is included exactly once at the end. + # those linked sites, while also considering the damping factor. Uses + # RELSUM after partitioning on the destination site to perform the + # accumulation, then filters to only keep the one row of the + # destination site that came from the self-link. This ensures that each + # site is included once after each iteration, and the `n_out` value for + # that site is daisy-chained to the next iteration. source = ( source.outgoing_links.CALCULATE( - consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)) + dummy_link=PRESENT(target_key) & (source_key == target_key), + consider_link=INTEGER(ABSENT(target_key) | (source_key != target_key)), ) - .target_site.PARTITION(name=group_name, by=key) + .target_site.PARTITION(name=f"s{i}", by=key) .target_site.CALCULATE( n, + n_out, page_rank=(1.0 - d) / n - + d * RELSUM(consider_link * page_rank / n_out, per=group_name), + + d * RELSUM(consider_link * page_rank / n_out, per=f"s{i}"), ) - .BEST(per=group_name, by=key.ASC()) + .WHERE(dummy_link) ) - # Unless we are done, re-derive `n_out` for the current node. - if i < n_iters - 1: - source = source.CALCULATE(n_out=n_out_expr) # Output the final PageRank values, rounded to 5 decimal places, return source.CALCULATE(key, page_rank=ROUND(page_rank, 5)).ORDER_BY(key.ASC()) diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_a1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql index 32a7c2048..0b90f0fd8 100644 --- a/tests/test_sql_refsols/pagerank_a2_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t14 AS ( +WITH _t9 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t14 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t14 + FROM _t9 ), _s1 AS ( SELECT l_source, @@ -26,74 +26,48 @@ WITH _t14 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t9 AS ( +), _t4 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t10.l_target IS NULL AND _t10.l_source = _t10.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t15 - ON _s2.anything_s_key = _t15.l_source - JOIN _t14 AS _s5 - ON _s5.s_key = _t15.l_target OR _t15.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + JOIN _s1 AS _t10 + ON _s2.anything_s_key = _t10.l_source + JOIN _t9 AS _s5 + ON _s5.s_key = _t10.l_target OR _t10.l_target IS NULL +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t16 - ON _s8.anything_s_key = _t16.l_source - JOIN _t14 AS _s11 - ON _s11.s_key = _t16.l_target OR _t16.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t11.l_source <> _t11.l_target OR _t11.l_target IS NULL AS INTEGER) * _t4.page_rank_0 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_20, + NOT _t11.l_target IS NULL AND _t11.l_source = _t11.l_target AS dummy_link_18, + _s9.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t11 + ON _t11.l_source = _t4.s_key + JOIN _t9 AS _s9 + ON _s9.s_key = _t11.l_target OR _t11.l_target IS NULL + WHERE + _t4.dummy_link ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_2 + ROUND(page_rank_0_20, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_18 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql index 9bdfb807c..24a887003 100644 --- a/tests/test_sql_refsols/pagerank_a6_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t38 AS ( +WITH _t17 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t38 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t38 + FROM _t17 ), _s1 AS ( SELECT l_source, @@ -26,254 +26,128 @@ WITH _t38 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t33 AS ( +), _t12 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t39.l_source <> _t39.l_target OR _t39.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t39 - ON _s2.anything_s_key = _t39.l_source - JOIN _t38 AS _s5 - ON _s5.s_key = _t39.l_target OR _t39.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t33 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t27 AS ( + JOIN _s1 AS _t18 + ON _s2.anything_s_key = _t18.l_source + JOIN _t17 AS _s5 + ON _s5.s_key = _t18.l_target OR _t18.l_target IS NULL +), _t10 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t12.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t40.l_source <> _t40.l_target OR _t40.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t40 - ON _s8.anything_s_key = _t40.l_source - JOIN _t38 AS _s11 - ON _s11.s_key = _t40.l_target OR _t40.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t27 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t12.page_rank_0 + ) AS REAL) / _t12.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_550, + _t12.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_548, + _t12.n_out, + _s9.s_key + FROM _t12 AS _t12 + JOIN _s1 AS _t19 + ON _t12.s_key = _t19.l_source + JOIN _t17 AS _s9 + ON _s9.s_key = _t19.l_target OR _t19.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t21 AS ( + _t12.dummy_link +), _t8 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t10.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t41.l_source <> _t41.l_target OR _t41.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t41 - ON _s14.anything_s_key = _t41.l_source - JOIN _t38 AS _s17 - ON _s17.s_key = _t41.l_target OR _t41.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t10.page_rank_0_550 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_560, + _t10.anything_n, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_558, + _t10.n_out, + _s13.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t20 + ON _t10.s_key = _t20.l_source + JOIN _t17 AS _s13 + ON _s13.s_key = _t20.l_target OR _t20.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + _t10.dummy_link_548 +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t42.l_source <> _t42.l_target OR _t42.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s20.anything_anything_anything_anything_n, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t42 - ON _s20.anything_s_key = _t42.l_source - JOIN _t38 AS _s23 - ON _s23.s_key = _t42.l_target OR _t42.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s26 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s25.l_target IS NULL, - _t.anything_anything_anything_anything_n, - CAST(_s25.l_source <> _s25.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_4 AS _t - JOIN _s1 AS _s25 - ON _s25.l_source = _t.s_key + CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _t8.page_rank_0_560 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_570, + _t8.anything_n, + NOT _t21.l_target IS NULL AND _t21.l_source = _t21.l_target AS dummy_link_568, + _t8.n_out, + _s17.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t21 + ON _t21.l_source = _t8.s_key + JOIN _t17 AS _s17 + ON _s17.s_key = _t21.l_target OR _t21.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link_558 +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t43.l_source <> _t43.l_target OR _t43.l_target IS NULL AS INTEGER) * _s26.page_rank - ) AS REAL) / _s26.n_out - ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, - _s26.anything_anything_anything_anything_anything_n, - _s29.s_key - FROM _s26 AS _s26 - JOIN _s1 AS _t43 - ON _s26.anything_s_key = _t43.l_source - JOIN _t38 AS _s29 - ON _s29.s_key = _t43.l_target OR _t43.l_target IS NULL -), _t_5 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s32 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s31.l_target IS NULL, - _t.anything_anything_anything_anything_anything_n, - CAST(_s31.l_source <> _s31.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_5 AS _t - JOIN _s1 AS _s31 - ON _s31.l_source = _t.s_key + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _t6.page_rank_0_570 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_580, + _t6.anything_n, + NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link_578, + _t6.n_out, + _s21.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t22 + ON _t22.l_source = _t6.s_key + JOIN _t17 AS _s21 + ON _s21.s_key = _t22.l_target OR _t22.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_568 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s32.anything_anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t44.l_source <> _t44.l_target OR _t44.l_target IS NULL AS INTEGER) * _s32.page_rank - ) AS REAL) / _s32.n_out - ) OVER (PARTITION BY _s35.s_key) AS page_rank_0, - _s35.s_key - FROM _s32 AS _s32 - JOIN _s1 AS _t44 - ON _s32.anything_s_key = _t44.l_source - JOIN _t38 AS _s35 - ON _s35.s_key = _t44.l_target OR _t44.l_target IS NULL -), _t_6 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t4.page_rank_0_580 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_590, + NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_588, + _s25.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t23 + ON _t23.l_source = _t4.s_key + JOIN _t17 AS _s25 + ON _s25.s_key = _t23.l_target OR _t23.l_target IS NULL + WHERE + _t4.dummy_link_578 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_6 + ROUND(page_rank_0_590, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_588 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_b1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql index 00086a1c7..40404439e 100644 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t20 AS ( +WITH _t11 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t20 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t20 + FROM _t11 ), _s1 AS ( SELECT l_source, @@ -26,119 +26,68 @@ WITH _t20 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t15 AS ( +), _t6 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t21 - ON _s2.anything_s_key = _t21.l_source - JOIN _t20 AS _s5 - ON _s5.s_key = _t21.l_target OR _t21.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + JOIN _s1 AS _t12 + ON _s2.anything_s_key = _t12.l_source + JOIN _t11 AS _s5 + ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t22 - ON _s8.anything_s_key = _t22.l_source - JOIN _t20 AS _s11 - ON _s11.s_key = _t22.l_target OR _t22.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, + _t6.anything_n, + NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, + _t6.n_out, + _s9.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t13 + ON _t13.l_source = _t6.s_key + JOIN _t11 AS _s9 + ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t23 - ON _s14.anything_s_key = _t23.l_source - JOIN _t20 AS _s17 - ON _s17.s_key = _t23.l_target OR _t23.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, + _s13.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t14 + ON _t14.l_source = _t4.s_key + JOIN _t11 AS _s13 + ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL + WHERE + _t4.dummy_link_46 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_3 + ROUND(page_rank_0_58, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_56 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql index 591e82f3d..e0c215a16 100644 --- a/tests/test_sql_refsols/pagerank_c4_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t26 AS ( +WITH _t13 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t26 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t26 + FROM _t13 ), _s1 AS ( SELECT l_source, @@ -26,164 +26,88 @@ WITH _t26 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t21 AS ( +), _t8 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t27 - ON _s2.anything_s_key = _t27.l_source - JOIN _t26 AS _s5 - ON _s5.s_key = _t27.l_target OR _t27.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + JOIN _s1 AS _t14 + ON _s2.anything_s_key = _t14.l_source + JOIN _t13 AS _s5 + ON _s5.s_key = _t14.l_target OR _t14.l_target IS NULL +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t28 - ON _s8.anything_s_key = _t28.l_source - JOIN _t26 AS _s11 - ON _s11.s_key = _t28.l_target OR _t28.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _t8.page_rank_0 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_114, + _t8.anything_n, + NOT _t15.l_target IS NULL AND _t15.l_source = _t15.l_target AS dummy_link_112, + _t8.n_out, + _s9.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t15 + ON _t15.l_source = _t8.s_key + JOIN _t13 AS _s9 + ON _s9.s_key = _t15.l_target OR _t15.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t29 - ON _s14.anything_s_key = _t29.l_source - JOIN _t26 AS _s17 - ON _s17.s_key = _t29.l_target OR _t29.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _t6.page_rank_0_114 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_124, + _t6.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link_122, + _t6.n_out, + _s13.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t16 + ON _t16.l_source = _t6.s_key + JOIN _t13 AS _s13 + ON _s13.s_key = _t16.l_target OR _t16.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_112 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t30.l_source <> _t30.l_target OR _t30.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t30 - ON _s20.anything_s_key = _t30.l_source - JOIN _t26 AS _s23 - ON _s23.s_key = _t30.l_target OR _t30.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t4.page_rank_0_124 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_134, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_132, + _s17.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t17 + ON _t17.l_source = _t4.s_key + JOIN _t13 AS _s17 + ON _s17.s_key = _t17.l_target OR _t17.l_target IS NULL + WHERE + _t4.dummy_link_122 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_4 + ROUND(page_rank_0_134, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_132 ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql index 549289485..d7965056f 100644 --- a/tests/test_sql_refsols/pagerank_d1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d1_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t8 AS ( +WITH _t7 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t8 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t8 + FROM _t7 ), _s1 AS ( SELECT l_source, @@ -26,33 +26,28 @@ WITH _t8 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t3 AS ( +), _t2 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t9.l_source <> _t9.l_target OR _t9.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t9 - ON _s2.anything_s_key = _t9.l_source - JOIN _t8 AS _s5 - ON _s5.s_key = _t9.l_target OR _t9.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + JOIN _s1 AS _t8 + ON _s2.anything_s_key = _t8.l_source + JOIN _t7 AS _s5 + ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL ) SELECT s_key AS key, ROUND(page_rank_0, 5) AS page_rank -FROM _t +FROM _t2 WHERE - _w = 1 + dummy_link ORDER BY s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql index 09af3379b..f6f5cb16e 100644 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t32 AS ( +WITH _t15 AS ( SELECT s_key FROM main.sites @@ -6,7 +6,7 @@ WITH _t32 AS ( SELECT COUNT(*) OVER () AS n, s_key - FROM _t32 + FROM _t15 ), _s1 AS ( SELECT l_source, @@ -26,209 +26,108 @@ WITH _t32 AS ( ON _s0.s_key = _s1.l_source GROUP BY _s0.s_key -), _t27 AS ( +), _t10 AS ( SELECT ( CAST(0.15000000000000002 AS REAL) / _s2.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t33.l_source <> _t33.l_target OR _t33.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, _s2.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, + _s2.n_out, _s5.s_key FROM _s2 AS _s2 - JOIN _s1 AS _t33 - ON _s2.anything_s_key = _t33.l_source - JOIN _t32 AS _s5 - ON _s5.s_key = _t33.l_target OR _t33.l_target IS NULL -), _t AS ( - SELECT - page_rank_0, - anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t27 -), _s8 AS ( - SELECT - COALESCE( - SUM( - IIF(_s7.l_target IS NULL, _t.anything_n, CAST(_s7.l_source <> _s7.l_target AS INTEGER)) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_n) AS anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t AS _t - JOIN _s1 AS _s7 - ON _s7.l_source = _t.s_key - WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t21 AS ( + JOIN _s1 AS _t16 + ON _s2.anything_s_key = _t16.l_source + JOIN _t15 AS _s5 + ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t8 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s8.anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t10.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t34.l_source <> _t34.l_target OR _t34.l_target IS NULL AS INTEGER) * _s8.page_rank - ) AS REAL) / _s8.n_out - ) OVER (PARTITION BY _s11.s_key) AS page_rank_0, - _s8.anything_anything_n, - _s11.s_key - FROM _s8 AS _s8 - JOIN _s1 AS _t34 - ON _s8.anything_s_key = _t34.l_source - JOIN _t32 AS _s11 - ON _s11.s_key = _t34.l_target OR _t34.l_target IS NULL -), _t_2 AS ( - SELECT - page_rank_0, - anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t21 -), _s14 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s13.l_target IS NULL, - _t.anything_anything_n, - CAST(_s13.l_source <> _s13.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_n) AS anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_2 AS _t - JOIN _s1 AS _s13 - ON _s13.l_source = _t.s_key + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, + _t10.anything_n, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, + _t10.n_out, + _s9.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t17 + ON _t10.s_key = _t17.l_source + JOIN _t15 AS _s9 + ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t15 AS ( + _t10.dummy_link +), _t6 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s14.anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t8.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t35.l_source <> _t35.l_target OR _t35.l_target IS NULL AS INTEGER) * _s14.page_rank - ) AS REAL) / _s14.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0, - _s14.anything_anything_anything_n, - _s17.s_key - FROM _s14 AS _s14 - JOIN _s1 AS _t35 - ON _s14.anything_s_key = _t35.l_source - JOIN _t32 AS _s17 - ON _s17.s_key = _t35.l_target OR _t35.l_target IS NULL -), _t_3 AS ( - SELECT - page_rank_0, - anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t15 -), _s20 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s19.l_target IS NULL, - _t.anything_anything_anything_n, - CAST(_s19.l_source <> _s19.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_n) AS anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_3 AS _t - JOIN _s1 AS _s19 - ON _s19.l_source = _t.s_key + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, + _t8.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, + _t8.n_out, + _s13.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t18 + ON _t18.l_source = _t8.s_key + JOIN _t15 AS _s13 + ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t9 AS ( + _t8.dummy_link_254 +), _t4 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s20.anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t6.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t36.l_source <> _t36.l_target OR _t36.l_target IS NULL AS INTEGER) * _s20.page_rank - ) AS REAL) / _s20.n_out - ) OVER (PARTITION BY _s23.s_key) AS page_rank_0, - _s20.anything_anything_anything_anything_n, - _s23.s_key - FROM _s20 AS _s20 - JOIN _s1 AS _t36 - ON _s20.anything_s_key = _t36.l_source - JOIN _t32 AS _s23 - ON _s23.s_key = _t36.l_target OR _t36.l_target IS NULL -), _t_4 AS ( - SELECT - page_rank_0, - anything_anything_anything_anything_n, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t9 -), _s26 AS ( - SELECT - COALESCE( - SUM( - IIF( - _s25.l_target IS NULL, - _t.anything_anything_anything_anything_n, - CAST(_s25.l_source <> _s25.l_target AS INTEGER) - ) - ), - 0 - ) AS n_out, - MAX(_t.page_rank_0) AS page_rank, - MAX(_t.anything_anything_anything_anything_n) AS anything_anything_anything_anything_anything_n, - MAX(_t.s_key) AS anything_s_key - FROM _t_4 AS _t - JOIN _s1 AS _s25 - ON _s25.l_source = _t.s_key + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, + _t6.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, + _t6.n_out, + _s17.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t19 + ON _t19.l_source = _t6.s_key + JOIN _t15 AS _s17 + ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL WHERE - _t._w = 1 - GROUP BY - _t.s_key -), _t3 AS ( + _t6.dummy_link_264 +), _t2 AS ( SELECT ( - CAST(0.15000000000000002 AS REAL) / _s26.anything_anything_anything_anything_anything_n + CAST(0.15000000000000002 AS REAL) / _t4.anything_n ) + 0.85 * SUM( CAST(( - CAST(_t37.l_source <> _t37.l_target OR _t37.l_target IS NULL AS INTEGER) * _s26.page_rank - ) AS REAL) / _s26.n_out - ) OVER (PARTITION BY _s29.s_key) AS page_rank_0, - _s29.s_key - FROM _s26 AS _s26 - JOIN _s1 AS _t37 - ON _s26.anything_s_key = _t37.l_source - JOIN _t32 AS _s29 - ON _s29.s_key = _t37.l_target OR _t37.l_target IS NULL -), _t_5 AS ( - SELECT - page_rank_0, - s_key, - ROW_NUMBER() OVER (PARTITION BY s_key ORDER BY s_key) AS _w - FROM _t3 + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, + _s21.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t20 + ON _t20.l_source = _t4.s_key + JOIN _t15 AS _s21 + ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL + WHERE + _t4.dummy_link_274 ) SELECT s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t_5 + ROUND(page_rank_0_286, 5) AS page_rank +FROM _t2 WHERE - _w = 1 + dummy_link_284 ORDER BY s_key From 44fdd3336fc59c664bb1eb0167474b36f193b872 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:16:12 -0400 Subject: [PATCH 32/97] [RUN CI] From b5d90f25e8d78685626efbc0493cec8136511cbf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 16 Jul 2025 16:17:02 -0400 Subject: [PATCH 33/97] [RUN CI] From 0128758695d8aaca07b70215a624232916ba55d5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:14:54 -0400 Subject: [PATCH 34/97] Added tests e/f, deleted relational/sql tests for graphs other than a/c --- tests/conftest.py | 2 + tests/test_pipeline_pagerank.py | 50 ++++++- tests/test_plan_refsols/pagerank_b0.txt | 7 - tests/test_plan_refsols/pagerank_b1.txt | 16 --- tests/test_plan_refsols/pagerank_b3.txt | 30 ---- tests/test_plan_refsols/pagerank_d1.txt | 16 --- tests/test_plan_refsols/pagerank_d5.txt | 44 ------ tests/test_sql_refsols/pagerank_b0_sqlite.sql | 16 --- tests/test_sql_refsols/pagerank_b1_sqlite.sql | 53 ------- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 93 ------------ tests/test_sql_refsols/pagerank_d1_sqlite.sql | 53 ------- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 133 ------------------ 12 files changed, 49 insertions(+), 464 deletions(-) delete mode 100644 tests/test_plan_refsols/pagerank_b0.txt delete mode 100644 tests/test_plan_refsols/pagerank_b1.txt delete mode 100644 tests/test_plan_refsols/pagerank_b3.txt delete mode 100644 tests/test_plan_refsols/pagerank_d1.txt delete mode 100644 tests/test_plan_refsols/pagerank_d5.txt delete mode 100644 tests/test_sql_refsols/pagerank_b0_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_b1_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_d1_sqlite.sql delete mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/tests/conftest.py b/tests/conftest.py index ec28ed893..1bbe51c09 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -513,6 +513,8 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: (15, 2), ], ), + ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), + ("PAGERANK_F", 100, []), ] # Setup each of the the pagerank databases using the configurations. diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index c8a846e5f..f55c53eec 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -219,6 +219,38 @@ ), id="pagerank_d5", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_E", + lambda: pd.DataFrame( + { + "key": [1, 2, 3, 4, 5], + "page_rank": [0.2] * 5, + } + ), + "pagerank_e1", + order_sensitive=True, + args=[1], + ), + id="pagerank_e1", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_F", + lambda: pd.DataFrame( + { + "key": list(range(1, 101)), + "page_rank": [0.01] * 100, + } + ), + "pagerank_f2", + order_sensitive=True, + args=[2], + ), + id="pagerank_f2", + ), ], ) def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: @@ -238,7 +270,13 @@ def test_pipeline_until_relational_pagerank( ) -> None: """ Verifies the generated relational plans for the pagerank tests. + Only runs the tests with the `PAGERANK_A`/`PAGERANK_C` graphs, + since the others are essentially duplicates of the plans. """ + if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): + pytest.skip( + "Skipping relational plan test for graphs other than PAGERANK_A or PAGERANK_C" + ) file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) pagerank_pipeline_test_data.run_relational_test( get_pagerank_graph, file_path, update_tests @@ -253,9 +291,14 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. The outputs were - generated using this website: https://pagerank-visualizer.netlify.app/. + Verifies the generated SQL for the pagerank tests. Only runs the tests with + the `PAGERANK_A`/`PAGE_RANK_C` graphs, since the others are essentially + duplicates of the generated SQL. """ + if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): + pytest.skip( + "Skipping sql query test for graphs other than PAGERANK_A or PAGERANK_C" + ) ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name ] @@ -274,7 +317,8 @@ def test_pipeline_e2e_pagerank( sqlite_pagerank_db_contexts: dict[str, DatabaseContext], ): """ - Verifies the final output answer for the pagerank tests. + Verifies the final output answer for the pagerank tests. The outputs were + generated using this website: https://pagerank-visualizer.netlify.app/. """ pagerank_pipeline_test_data.run_e2e_test( get_pagerank_graph, diff --git a/tests/test_plan_refsols/pagerank_b0.txt b/tests/test_plan_refsols/pagerank_b0.txt deleted file mode 100644 index 89355a8da..000000000 --- a/tests/test_plan_refsols/pagerank_b0.txt +++ /dev/null @@ -1,7 +0,0 @@ -ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) - PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) - PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source}) diff --git a/tests/test_plan_refsols/pagerank_b1.txt b/tests/test_plan_refsols/pagerank_b1.txt deleted file mode 100644 index b3053e78b..000000000 --- a/tests/test_plan_refsols/pagerank_b1.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt deleted file mode 100644 index d78322a4d..000000000 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ /dev/null @@ -1,30 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) - PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) - FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d1.txt b/tests/test_plan_refsols/pagerank_d1.txt deleted file mode 100644 index b3053e78b..000000000 --- a/tests/test_plan_refsols/pagerank_d1.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt deleted file mode 100644 index fb3b3c239..000000000 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ /dev/null @@ -1,44 +0,0 @@ -ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) - PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) - FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) - PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) - FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) - FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) - FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) - SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_b0_sqlite.sql b/tests/test_sql_refsols/pagerank_b0_sqlite.sql deleted file mode 100644 index 4fe8f22be..000000000 --- a/tests/test_sql_refsols/pagerank_b0_sqlite.sql +++ /dev/null @@ -1,16 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) OVER () AS agg_2, - s_key - FROM main.sites -) -SELECT - MAX(_s0.s_key) AS key, - ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank -FROM _s0 AS _s0 -JOIN main.links AS links - ON _s0.s_key = links.l_source -GROUP BY - _s0.s_key -ORDER BY - MAX(_s0.s_key) diff --git a/tests/test_sql_refsols/pagerank_b1_sqlite.sql b/tests/test_sql_refsols/pagerank_b1_sqlite.sql deleted file mode 100644 index d7965056f..000000000 --- a/tests/test_sql_refsols/pagerank_b1_sqlite.sql +++ /dev/null @@ -1,53 +0,0 @@ -WITH _t7 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t7 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t8 - ON _s2.anything_s_key = _t8.l_source - JOIN _t7 AS _s5 - ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL -) -SELECT - s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t2 -WHERE - dummy_link -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql deleted file mode 100644 index 40404439e..000000000 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ /dev/null @@ -1,93 +0,0 @@ -WITH _t11 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t11 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t6 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, - NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, - _s2.n_out, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t12 - ON _s2.anything_s_key = _t12.l_source - JOIN _t11 AS _s5 - ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL -), _t4 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 - ) AS REAL) / _t6.n_out - ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, - _t6.anything_n, - NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, - _t6.n_out, - _s9.s_key - FROM _t6 AS _t6 - JOIN _s1 AS _t13 - ON _t13.l_source = _t6.s_key - JOIN _t11 AS _s9 - ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL - WHERE - _t6.dummy_link -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 - ) AS REAL) / _t4.n_out - ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, - NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, - _s13.s_key - FROM _t4 AS _t4 - JOIN _s1 AS _t14 - ON _t14.l_source = _t4.s_key - JOIN _t11 AS _s13 - ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL - WHERE - _t4.dummy_link_46 -) -SELECT - s_key AS key, - ROUND(page_rank_0_58, 5) AS page_rank -FROM _t2 -WHERE - dummy_link_56 -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_d1_sqlite.sql b/tests/test_sql_refsols/pagerank_d1_sqlite.sql deleted file mode 100644 index d7965056f..000000000 --- a/tests/test_sql_refsols/pagerank_d1_sqlite.sql +++ /dev/null @@ -1,53 +0,0 @@ -WITH _t7 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t7 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t8 - ON _s2.anything_s_key = _t8.l_source - JOIN _t7 AS _s5 - ON _s5.s_key = _t8.l_target OR _t8.l_target IS NULL -) -SELECT - s_key AS key, - ROUND(page_rank_0, 5) AS page_rank -FROM _t2 -WHERE - dummy_link -ORDER BY - s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql deleted file mode 100644 index f6f5cb16e..000000000 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ /dev/null @@ -1,133 +0,0 @@ -WITH _t15 AS ( - SELECT - s_key - FROM main.sites -), _s0 AS ( - SELECT - COUNT(*) OVER () AS n, - s_key - FROM _t15 -), _s1 AS ( - SELECT - l_source, - l_target - FROM main.links -), _s2 AS ( - SELECT - COALESCE( - SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), - 0 - ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, - MAX(_s0.s_key) AS anything_s_key - FROM _s0 AS _s0 - JOIN _s1 AS _s1 - ON _s0.s_key = _s1.l_source - GROUP BY - _s0.s_key -), _t10 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank - ) AS REAL) / _s2.n_out - ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, - NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, - _s2.n_out, - _s5.s_key - FROM _s2 AS _s2 - JOIN _s1 AS _t16 - ON _s2.anything_s_key = _t16.l_source - JOIN _t15 AS _s5 - ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL -), _t8 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 - ) AS REAL) / _t10.n_out - ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, - _t10.anything_n, - NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, - _t10.n_out, - _s9.s_key - FROM _t10 AS _t10 - JOIN _s1 AS _t17 - ON _t10.s_key = _t17.l_source - JOIN _t15 AS _s9 - ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL - WHERE - _t10.dummy_link -), _t6 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 - ) AS REAL) / _t8.n_out - ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, - _t8.anything_n, - NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, - _t8.n_out, - _s13.s_key - FROM _t8 AS _t8 - JOIN _s1 AS _t18 - ON _t18.l_source = _t8.s_key - JOIN _t15 AS _s13 - ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL - WHERE - _t8.dummy_link_254 -), _t4 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 - ) AS REAL) / _t6.n_out - ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, - _t6.anything_n, - NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, - _t6.n_out, - _s17.s_key - FROM _t6 AS _t6 - JOIN _s1 AS _t19 - ON _t19.l_source = _t6.s_key - JOIN _t15 AS _s17 - ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL - WHERE - _t6.dummy_link_264 -), _t2 AS ( - SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( - CAST(( - CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 - ) AS REAL) / _t4.n_out - ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, - NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, - _s21.s_key - FROM _t4 AS _t4 - JOIN _s1 AS _t20 - ON _t20.l_source = _t4.s_key - JOIN _t15 AS _s21 - ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL - WHERE - _t4.dummy_link_274 -) -SELECT - s_key AS key, - ROUND(page_rank_0_286, 5) AS page_rank -FROM _t2 -WHERE - dummy_link_284 -ORDER BY - s_key From c9a5fe1f3c0df6b710c86c0e81734d5da588ce1a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:19:26 -0400 Subject: [PATCH 35/97] Adjusted how the skips are handled --- tests/test_pipeline_pagerank.py | 24 ++-- tests/test_plan_refsols/pagerank_b3.txt | 30 ++++ tests/test_plan_refsols/pagerank_d5.txt | 44 ++++++ tests/test_sql_refsols/pagerank_b3_sqlite.sql | 93 ++++++++++++ tests/test_sql_refsols/pagerank_d5_sqlite.sql | 133 ++++++++++++++++++ tests/testing_utilities.py | 22 +++ 6 files changed, 333 insertions(+), 13 deletions(-) create mode 100644 tests/test_plan_refsols/pagerank_b3.txt create mode 100644 tests/test_plan_refsols/pagerank_d5.txt create mode 100644 tests/test_sql_refsols/pagerank_b3_sqlite.sql create mode 100644 tests/test_sql_refsols/pagerank_d5_sqlite.sql diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index f55c53eec..56c948395 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -91,6 +91,8 @@ } ), "pagerank_b0", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[0], ), @@ -107,6 +109,8 @@ } ), "pagerank_b1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -181,6 +185,8 @@ } ), "pagerank_d1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -230,6 +236,8 @@ } ), "pagerank_e1", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[1], ), @@ -246,6 +254,8 @@ } ), "pagerank_f2", + skip_relational=True, + skip_sql=True, order_sensitive=True, args=[2], ), @@ -270,13 +280,7 @@ def test_pipeline_until_relational_pagerank( ) -> None: """ Verifies the generated relational plans for the pagerank tests. - Only runs the tests with the `PAGERANK_A`/`PAGERANK_C` graphs, - since the others are essentially duplicates of the plans. """ - if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): - pytest.skip( - "Skipping relational plan test for graphs other than PAGERANK_A or PAGERANK_C" - ) file_path: str = get_plan_test_filename(pagerank_pipeline_test_data.test_name) pagerank_pipeline_test_data.run_relational_test( get_pagerank_graph, file_path, update_tests @@ -291,14 +295,8 @@ def test_pipeline_until_sql_pagerank( update_tests: bool, ) -> None: """ - Verifies the generated SQL for the pagerank tests. Only runs the tests with - the `PAGERANK_A`/`PAGE_RANK_C` graphs, since the others are essentially - duplicates of the generated SQL. + Verifies the generated SQL for the pagerank tests. """ - if pagerank_pipeline_test_data.graph_name not in ("PAGERANK_A", "PAGERANK_C"): - pytest.skip( - "Skipping sql query test for graphs other than PAGERANK_A or PAGERANK_C" - ) ctx: DatabaseContext = sqlite_pagerank_db_contexts[ pagerank_pipeline_test_data.graph_name ] diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt new file mode 100644 index 000000000..d78322a4d --- /dev/null +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -0,0 +1,30 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_47': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_46': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_57': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_56': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt new file mode 100644 index 000000000..fb3b3c239 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -0,0 +1,44 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_255': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_254': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_265': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_264': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_275': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_274': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_285': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_284': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql new file mode 100644 index 000000000..40404439e --- /dev/null +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -0,0 +1,93 @@ +WITH _t11 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t11 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t12 + ON _s2.anything_s_key = _t12.l_source + JOIN _t11 AS _s5 + ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, + _t6.anything_n, + NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, + _t6.n_out, + _s9.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t13 + ON _t13.l_source = _t6.s_key + JOIN _t11 AS _s9 + ON _s9.s_key = _t13.l_target OR _t13.l_target IS NULL + WHERE + _t6.dummy_link +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_58, + NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link_56, + _s13.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t14 + ON _t14.l_source = _t4.s_key + JOIN _t11 AS _s13 + ON _s13.s_key = _t14.l_target OR _t14.l_target IS NULL + WHERE + _t4.dummy_link_46 +) +SELECT + s_key AS key, + ROUND(page_rank_0_58, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_56 +ORDER BY + s_key diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql new file mode 100644 index 000000000..f6f5cb16e --- /dev/null +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -0,0 +1,133 @@ +WITH _t15 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t15 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t10 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t16 + ON _s2.anything_s_key = _t16.l_source + JOIN _t15 AS _s5 + ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL +), _t8 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t10.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, + _t10.anything_n, + NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, + _t10.n_out, + _s9.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t17 + ON _t10.s_key = _t17.l_source + JOIN _t15 AS _s9 + ON _s9.s_key = _t17.l_target OR _t17.l_target IS NULL + WHERE + _t10.dummy_link +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t8.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, + _t8.anything_n, + NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, + _t8.n_out, + _s13.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t18 + ON _t18.l_source = _t8.s_key + JOIN _t15 AS _s13 + ON _s13.s_key = _t18.l_target OR _t18.l_target IS NULL + WHERE + _t8.dummy_link_254 +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, + _t6.anything_n, + NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, + _t6.n_out, + _s17.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t19 + ON _t19.l_source = _t6.s_key + JOIN _t15 AS _s17 + ON _s17.s_key = _t19.l_target OR _t19.l_target IS NULL + WHERE + _t6.dummy_link_264 +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_286, + NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_284, + _s21.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t20 + ON _t20.l_source = _t4.s_key + JOIN _t15 AS _s21 + ON _s21.s_key = _t20.l_target OR _t20.l_target IS NULL + WHERE + _t4.dummy_link_274 +) +SELECT + s_key AS key, + ROUND(page_rank_0_286, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_284 +ORDER BY + s_key diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d95d35562..d2c01690a 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1043,6 +1043,10 @@ class PyDoughPandasTest: in the output and just use the same column names as in the reference solution. - `args` (optional): additional arguments to pass to the PyDough function. + - `skip_relational`: (optional): if True, does not run the test as part of + relational plan testing. Default is False. + - `skip_sql`: (optional): if True, does not run the test as part of SQL + testing. Default is False. """ pydough_function: Callable[..., UnqualifiedNode] @@ -1090,6 +1094,16 @@ class PyDoughPandasTest: executing it. If None, no additional arguments are passed. """ + skip_relational: bool = False + """ + If True, does not run the test as part of relational plan testing. + """ + + skip_sql: bool = False + """ + If True, does not run the test as part of SQL testing. + """ + def run_relational_test( self, fetcher: graph_fetcher, @@ -1111,6 +1125,10 @@ def run_relational_test( against the expected relational plan text in the file. `config`: The PyDough configuration to use for the test, if any. """ + # Skip if indicated. + if self.skip_relational: + pytest.skip(f"Skipping relational plan test for {self.test_name!r}") + # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( @@ -1165,6 +1183,10 @@ def run_sql_test( to use when generating the SQL test. `config`: The PyDough configuration to use for the test, if any. """ + # Skip if indicated. + if self.skip_sql: + pytest.skip(f"Skipping SQL text test for {self.test_name!r}") + # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( From d6ed6c65d8c66153492e20595651941b0edd5af8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 12:59:30 -0400 Subject: [PATCH 36/97] Adding larger graph & dense graph tests [RUN CI] --- tests/conftest.py | 20 +++++++ tests/test_pipeline_pagerank.py | 95 +++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 1bbe51c09..21b362b2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -515,6 +515,26 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ), ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), ("PAGERANK_F", 100, []), + ( + "PAGERANK_G", + 1000, + [ + (j + 1, i + 1) + for i in range(1000) + for j in range(i + 1, 1000) + if str(i) in str(j) + ], + ), + ( + "PAGERANK_H", + 50, + [ + (i, j) + for i in range(1, 51) + for j in range(1, 51) + if i != j and (i < j or i % j == 0) + ], + ), ] # Setup each of the the pagerank databases using the configurations. diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 56c948395..3385d7199 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -261,6 +261,101 @@ ), id="pagerank_f2", ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_G", + lambda: pd.DataFrame( + { + "key": list(range(1, 1001)), + "page_rank": [0.02471] + + [0.03806] * 9 + + [ + 0.00225 + if i % 10 == 0 + else (0.00234 if len(set(str(i))) == 1 else 0.00205) + for i in range(10, 100) + ] + + [0.00049] * 900, + } + ), + "pagerank_g5", + skip_relational=True, + skip_sql=True, + order_sensitive=True, + args=[5], + ), + id="pagerank_g5", + ), + pytest.param( + PyDoughPandasTest( + pagerank, + "PAGERANK_H", + lambda: pd.DataFrame( + { + "key": list(range(1, 51)), + "page_rank": [ + 0.07399, + 0.03496, + 0.0178, + 0.01513, + 0.02271, + 0.01277, + 0.02365, + 0.01234, + 0.01065, + 0.01941, + 0.01044, + 0.01207, + 0.00915, + 0.00958, + 0.01116, + 0.01242, + 0.00911, + 0.00929, + 0.00993, + 0.01019, + 0.01077, + 0.01195, + 0.01426, + 0.01403, + 0.02148, + 0.01121, + 0.01154, + 0.0119, + 0.01227, + 0.01273, + 0.01313, + 0.01369, + 0.0142, + 0.01482, + 0.01551, + 0.01628, + 0.01694, + 0.01804, + 0.01914, + 0.0204, + 0.02152, + 0.02352, + 0.02499, + 0.02791, + 0.03029, + 0.0331, + 0.03748, + 0.04604, + 0.04977, + 0.06437, + ], + } + ), + "pagerank_h3", + skip_relational=True, + skip_sql=True, + order_sensitive=True, + args=[3], + ), + id="pagerank_h3", + ), ], ) def pagerank_pipeline_test_data(request) -> PyDoughPandasTest: From ebf5339193f93d883c20b8f1a2783eca95cf3cb4 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 13:02:26 -0400 Subject: [PATCH 37/97] Changing test h to be a higher number of iterations [RUN CI] --- tests/test_pipeline_pagerank.py | 108 +++++----- tests/test_plan_refsols/pagerank_h8.txt | 65 ++++++ tests/test_sql_refsols/pagerank_h8_sqlite.sql | 193 ++++++++++++++++++ 3 files changed, 311 insertions(+), 55 deletions(-) create mode 100644 tests/test_plan_refsols/pagerank_h8.txt create mode 100644 tests/test_sql_refsols/pagerank_h8_sqlite.sql diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 3385d7199..8d37255a6 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -295,66 +295,64 @@ { "key": list(range(1, 51)), "page_rank": [ - 0.07399, - 0.03496, - 0.0178, - 0.01513, - 0.02271, - 0.01277, - 0.02365, - 0.01234, - 0.01065, - 0.01941, - 0.01044, - 0.01207, - 0.00915, - 0.00958, - 0.01116, - 0.01242, - 0.00911, - 0.00929, - 0.00993, - 0.01019, - 0.01077, - 0.01195, - 0.01426, - 0.01403, - 0.02148, - 0.01121, - 0.01154, - 0.0119, - 0.01227, - 0.01273, - 0.01313, - 0.01369, - 0.0142, - 0.01482, - 0.01551, - 0.01628, - 0.01694, - 0.01804, - 0.01914, - 0.0204, - 0.02152, - 0.02352, - 0.02499, - 0.02791, - 0.03029, - 0.0331, - 0.03748, - 0.04604, - 0.04977, - 0.06437, + 0.07097, + 0.03388, + 0.01732, + 0.01475, + 0.02226, + 0.01251, + 0.02268, + 0.01214, + 0.01054, + 0.01912, + 0.01041, + 0.01197, + 0.00931, + 0.0097, + 0.01115, + 0.01239, + 0.00938, + 0.00957, + 0.0102, + 0.01046, + 0.01102, + 0.01212, + 0.01427, + 0.01417, + 0.02157, + 0.01176, + 0.01213, + 0.01252, + 0.01292, + 0.01341, + 0.01384, + 0.01442, + 0.01496, + 0.01559, + 0.01629, + 0.01706, + 0.01772, + 0.0188, + 0.01986, + 0.02107, + 0.02212, + 0.024, + 0.02537, + 0.02806, + 0.03024, + 0.03281, + 0.0368, + 0.04463, + 0.04808, + 0.06171, ], } ), - "pagerank_h3", - skip_relational=True, - skip_sql=True, + "pagerank_h8", order_sensitive=True, - args=[3], + args=[8], ), - id="pagerank_h3", + id="pagerank_h8", ), ], ) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt new file mode 100644 index 000000000..045977af7 --- /dev/null +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -0,0 +1,65 @@ +ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_1': ROUND(page_rank_0_2414, 5:numeric), 's_key': s_key}) + FILTER(condition=dummy_link_2412, columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) + PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t0.consider_link_2413, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t1.consider_link_2413, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) + FILTER(condition=dummy_link_2402, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t0.consider_link_2403, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t1.consider_link_2403, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) + FILTER(condition=dummy_link_2392, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t0.consider_link_2393, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t1.consider_link_2393, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) + FILTER(condition=dummy_link_2382, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t0.consider_link_2383, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t1.consider_link_2383, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) + FILTER(condition=dummy_link_2372, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t0.consider_link_2373, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t1.consider_link_2373, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) + FILTER(condition=dummy_link_2362, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t0.consider_link_2363, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t1.consider_link_2363, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) + FILTER(condition=dummy_link_2352, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t0.consider_link_2353, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t1.consider_link_2353, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) + PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2353': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2352': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2363': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2362': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2373': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2372': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2383': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2382': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2393': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2392': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2403': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2402': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) + PROJECT(columns={'consider_link_2413': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link_2412': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_sql_refsols/pagerank_h8_sqlite.sql b/tests/test_sql_refsols/pagerank_h8_sqlite.sql new file mode 100644 index 000000000..017bc5921 --- /dev/null +++ b/tests/test_sql_refsols/pagerank_h8_sqlite.sql @@ -0,0 +1,193 @@ +WITH _t21 AS ( + SELECT + s_key + FROM main.sites +), _s0 AS ( + SELECT + COUNT(*) OVER () AS n, + s_key + FROM _t21 +), _s1 AS ( + SELECT + l_source, + l_target + FROM main.links +), _s2 AS ( + SELECT + COALESCE( + SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), + 0 + ) AS n_out, + CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, + MAX(_s0.n) AS anything_n, + MAX(_s0.s_key) AS anything_s_key + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.s_key = _s1.l_source + GROUP BY + _s0.s_key +), _t16 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _s2.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.page_rank + ) AS REAL) / _s2.n_out + ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, + _s2.anything_n, + NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link, + _s2.n_out, + _s5.s_key + FROM _s2 AS _s2 + JOIN _s1 AS _t22 + ON _s2.anything_s_key = _t22.l_source + JOIN _t21 AS _s5 + ON _s5.s_key = _t22.l_target OR _t22.l_target IS NULL +), _t14 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t16.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t16.page_rank_0 + ) AS REAL) / _t16.n_out + ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_2354, + _t16.anything_n, + NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_2352, + _t16.n_out, + _s9.s_key + FROM _t16 AS _t16 + JOIN _s1 AS _t23 + ON _t16.s_key = _t23.l_source + JOIN _t21 AS _s9 + ON _s9.s_key = _t23.l_target OR _t23.l_target IS NULL + WHERE + _t16.dummy_link +), _t12 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t14.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t24.l_source <> _t24.l_target OR _t24.l_target IS NULL AS INTEGER) * _t14.page_rank_0_2354 + ) AS REAL) / _t14.n_out + ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_2364, + _t14.anything_n, + NOT _t24.l_target IS NULL AND _t24.l_source = _t24.l_target AS dummy_link_2362, + _t14.n_out, + _s13.s_key + FROM _t14 AS _t14 + JOIN _s1 AS _t24 + ON _t14.s_key = _t24.l_source + JOIN _t21 AS _s13 + ON _s13.s_key = _t24.l_target OR _t24.l_target IS NULL + WHERE + _t14.dummy_link_2352 +), _t10 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t12.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t25.l_source <> _t25.l_target OR _t25.l_target IS NULL AS INTEGER) * _t12.page_rank_0_2364 + ) AS REAL) / _t12.n_out + ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_2374, + _t12.anything_n, + NOT _t25.l_target IS NULL AND _t25.l_source = _t25.l_target AS dummy_link_2372, + _t12.n_out, + _s17.s_key + FROM _t12 AS _t12 + JOIN _s1 AS _t25 + ON _t12.s_key = _t25.l_source + JOIN _t21 AS _s17 + ON _s17.s_key = _t25.l_target OR _t25.l_target IS NULL + WHERE + _t12.dummy_link_2362 +), _t8 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t10.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t26.l_source <> _t26.l_target OR _t26.l_target IS NULL AS INTEGER) * _t10.page_rank_0_2374 + ) AS REAL) / _t10.n_out + ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_2384, + _t10.anything_n, + NOT _t26.l_target IS NULL AND _t26.l_source = _t26.l_target AS dummy_link_2382, + _t10.n_out, + _s21.s_key + FROM _t10 AS _t10 + JOIN _s1 AS _t26 + ON _t10.s_key = _t26.l_source + JOIN _t21 AS _s21 + ON _s21.s_key = _t26.l_target OR _t26.l_target IS NULL + WHERE + _t10.dummy_link_2372 +), _t6 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t8.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _t8.page_rank_0_2384 + ) AS REAL) / _t8.n_out + ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_2394, + _t8.anything_n, + NOT _t27.l_target IS NULL AND _t27.l_source = _t27.l_target AS dummy_link_2392, + _t8.n_out, + _s25.s_key + FROM _t8 AS _t8 + JOIN _s1 AS _t27 + ON _t27.l_source = _t8.s_key + JOIN _t21 AS _s25 + ON _s25.s_key = _t27.l_target OR _t27.l_target IS NULL + WHERE + _t8.dummy_link_2382 +), _t4 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t6.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _t6.page_rank_0_2394 + ) AS REAL) / _t6.n_out + ) OVER (PARTITION BY _s29.s_key) AS page_rank_0_2404, + _t6.anything_n, + NOT _t28.l_target IS NULL AND _t28.l_source = _t28.l_target AS dummy_link_2402, + _t6.n_out, + _s29.s_key + FROM _t6 AS _t6 + JOIN _s1 AS _t28 + ON _t28.l_source = _t6.s_key + JOIN _t21 AS _s29 + ON _s29.s_key = _t28.l_target OR _t28.l_target IS NULL + WHERE + _t6.dummy_link_2392 +), _t2 AS ( + SELECT + ( + CAST(0.15000000000000002 AS REAL) / _t4.anything_n + ) + 0.85 * SUM( + CAST(( + CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _t4.page_rank_0_2404 + ) AS REAL) / _t4.n_out + ) OVER (PARTITION BY _s33.s_key) AS page_rank_0_2414, + NOT _t29.l_target IS NULL AND _t29.l_source = _t29.l_target AS dummy_link_2412, + _s33.s_key + FROM _t4 AS _t4 + JOIN _s1 AS _t29 + ON _t29.l_source = _t4.s_key + JOIN _t21 AS _s33 + ON _s33.s_key = _t29.l_target OR _t29.l_target IS NULL + WHERE + _t4.dummy_link_2402 +) +SELECT + s_key AS key, + ROUND(page_rank_0_2414, 5) AS page_rank +FROM _t2 +WHERE + dummy_link_2412 +ORDER BY + s_key From 80a9ca0e6a47c978455bdd5d4e8caaf406076ce8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 22:09:12 -0400 Subject: [PATCH 38/97] Moved around more errors, got rid of redundant ones, and fixed a cross-related bug --- pydough/conversion/relational_converter.py | 3 +- pydough/errors/pydough_error_builder.py | 149 +++++++++++++++++- pydough/evaluation/evaluate_unqualified.py | 28 +--- pydough/pydough_operators/base_operator.py | 11 +- .../expression_operators/binary_operators.py | 3 +- .../expression_operator.py | 11 +- pydough/qdag/README.md | 26 ++- pydough/qdag/collections/calculate.py | 60 +------ pydough/qdag/collections/collection_access.py | 7 +- pydough/qdag/collections/collection_qdag.py | 13 +- pydough/qdag/collections/global_context.py | 8 +- pydough/qdag/collections/order_by.py | 32 +--- pydough/qdag/collections/partition_by.py | 44 +----- pydough/qdag/collections/partition_child.py | 9 ++ pydough/qdag/collections/top_k.py | 5 +- pydough/qdag/collections/where.py | 31 +--- .../expressions/child_reference_expression.py | 6 +- pydough/qdag/expressions/column_property.py | 7 +- pydough/qdag/expressions/reference.py | 6 +- pydough/qdag/expressions/sided_reference.py | 6 +- pydough/qdag/node_builder.py | 39 +++-- pydough/unqualified/qualification.py | 35 ++-- pydough/unqualified/unqualified_transform.py | 2 +- tests/test_exploration.py | 2 +- tests/test_pipeline_tpch_custom.py | 10 ++ tests/test_plan_refsols/simple_cross_13.txt | 14 ++ .../exploration_examples.py | 5 +- .../simple_pydough_functions.py | 11 ++ tests/test_qdag_collection.py | 7 +- tests/test_qualification_errors.py | 44 ++++++ tests/testing_utilities.py | 22 +-- 31 files changed, 347 insertions(+), 309 deletions(-) create mode 100644 tests/test_plan_refsols/simple_cross_13.txt diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 956bb3698..cf94c88aa 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1315,8 +1315,7 @@ def preprocess_root( for _, column in output_cols: final_terms.append((column, Reference(node, column))) children: list[PyDoughCollectionQDAG] = [] - final_calc: Calculate = Calculate(node, children).with_terms(final_terms) - return final_calc + return Calculate(node, children, final_terms) def make_relational_ordering( diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index cd6f423e0..33c093c4b 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -7,7 +7,8 @@ from pydough.errors import PyDoughException, PyDoughQDAGException if TYPE_CHECKING: - from pydough.qdag import PyDoughCollectionQDAG + from pydough.pydough_operators import PyDoughOperator + from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG class PyDoughErrorBuilder: @@ -35,3 +36,149 @@ def term_not_found( return PyDoughQDAGException( collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) ) + + def down_streaming_conflict( + self, collection: "PyDoughCollectionQDAG", term_name: str + ) -> PyDoughException: + """ + Creates an exception for when a term accessed within a collection but + it is unclear whether it is a term of the collection or a term + downstreamed from an ancestor. + + Args: + `collection`: The collection in which the term is being accessed. + `term_name`: The name of the term that caused the ambiguity. + + Returns: + An exception indicating the name access ambiguity. + """ + return PyDoughQDAGException( + f"Unclear whether {term_name!r} refers to a term of the current context or ancestor of collection {collection!r}" + ) + + def cardinality_error( + self, collection: "PyDoughCollectionQDAG", expr: "PyDoughExpressionQDAG" + ) -> PyDoughException: + """ + Creates an exception for when a term is used within a context that + should be singular with regards to the context, but it is plural. + + Args: + `collection`: The collection in which the term is being accessed. + `expr`: The PyDoughQDAG expression + + Returns: + An exception indicating the cardinality error. + """ + raise PyDoughQDAGException( + f"Expected all terms in {collection.standalone_string} to be singular, but encountered a plural expression: {expr}" + ) + + def expected_collection(self, expr: object) -> PyDoughException: + """ + Creates an exception for when a QDAG collection is expected but + something else is found. + """ + from pydough.qdag import PyDoughExpressionQDAG + + if isinstance(expr, PyDoughExpressionQDAG): + return PyDoughQDAGException( + f"Expected a collection, but received an expression: {expr}" + ) + else: + return PyDoughQDAGException( + f"Expected a collection, but received {expr.__class__.__name__}: {expr}" + ) + + def expected_expression(self, expr: object) -> PyDoughException: + """ + Creates an exception for when a QDAG expression is expected but + something else is found. + """ + from pydough.qdag import PyDoughCollectionQDAG + + if isinstance(expr, PyDoughCollectionQDAG): + return PyDoughQDAGException( + f"Expected an expression, but received a collection: {expr}" + ) + else: + return PyDoughQDAGException( + f"Expected an expression, but received {expr.__class__.__name__}: {expr}" + ) + + def type_verification_fail( + self, operator: "PyDoughOperator", args: list[object], message: str + ) -> PyDoughException: + """ + Creates an exception for when type verification fails for an operator. + + Args: + `operator`: The operator that failed type verification. + `args`: The arguments passed to the operator. + `message`: The error message explaining the typing failure. + + Returns: + An exception indicating the type verification failure. + """ + arg_strings: list[str] = [str(arg) for arg in args] + raise PyDoughQDAGException( + f"Invalid operator invocation {operator.to_string(arg_strings)!r}: {message}" + ) + + def type_inference_fail( + self, operator: "PyDoughOperator", args: list[object], message: str + ) -> PyDoughException: + """ + Creates an exception for when return type inference fails for an + expression function operator. + + Args: + `operator`: The operator that failed type inference. + `args`: The arguments passed to the operator. + `message`: The error message explaining the inference failure. + + Returns: + An exception indicating the type inference failure. + """ + arg_strings: list[str] = [str(arg) for arg in args] + raise PyDoughQDAGException( + f"Unable to infer the return type of operator invocation {operator.to_string(arg_strings)!r}: {message}" + ) + + def bad_columns(self, columns: object) -> PyDoughException: + """ + Creates an exception for when the `columns` to `to_sql` or `to_df` is + not valid. + + Args: + `columns`: The columns argument that caused the error. + + Returns: + An exception indicating the bad `columns` argument. + """ + if isinstance(columns, list): + for column in columns: + if not isinstance(column, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a list of strings, found {column.__class__.__name__}" + ) + return PyDoughQDAGException( + "Expected `columns` argument to be a non-empty list" + ) + elif isinstance(columns, dict): + for alias, column in columns.items(): + if not isinstance(alias, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a dictionary where the keys are strings, found {alias.__class__.__name__}" + ) + if not isinstance(column, str): + return PyDoughQDAGException( + f"Expected `columns` argument to be a dictionary where the values are strings, found {column.__class__.__name__}" + ) + return PyDoughQDAGException( + "Expected `columns` argument to be a non-empty dictionary" + ) + else: + return PyDoughQDAGException( + f"Expected `columns` argument to be a list or dictionary, found {columns.__class__.__name__}" + ) diff --git a/pydough/evaluation/evaluate_unqualified.py b/pydough/evaluation/evaluate_unqualified.py index cca3101c8..d7693941b 100644 --- a/pydough/evaluation/evaluate_unqualified.py +++ b/pydough/evaluation/evaluate_unqualified.py @@ -13,7 +13,6 @@ from pydough.conversion import convert_ast_to_relational from pydough.database_connectors import DatabaseContext from pydough.errors import ( - PyDoughQDAGException, PyDoughSessionException, ) from pydough.metadata import GraphMetadata @@ -84,25 +83,18 @@ def _load_column_selection(kwargs: dict[str, object]) -> list[tuple[str, str]] | return None elif isinstance(columns_arg, list): for column in columns_arg: - assert isinstance(column, str), ( - f"Expected column name in `columns` argument to be a string, found {column.__class__.__name__}" - ) + if not isinstance(column, str): + raise pydough.active_session.error_builder.bad_columns(columns_arg) result.append((column, column)) elif isinstance(columns_arg, dict): for alias, column in columns_arg.items(): - assert isinstance(alias, str), ( - f"Expected alias name in `columns` argument to be a string, found {column.__class__.__name__}" - ) - assert isinstance(column, str), ( - f"Expected column name in `columns` argument to be a string, found {column.__class__.__name__}" - ) + if not isinstance(column, str) and isinstance(alias, str): + raise pydough.active_session.error_builder.bad_columns(columns_arg) result.append((alias, column)) else: - raise PyDoughQDAGException( - f"Expected `columns` argument to be a list or dictionary, found {columns_arg.__class__.__name__}" - ) + raise pydough.active_session.error_builder.bad_columns(columns_arg) if len(result) == 0: - raise PyDoughQDAGException("Column selection must not be empty") + raise pydough.active_session.error_builder.bad_columns(columns_arg) return result @@ -128,9 +120,7 @@ def to_sql(node: UnqualifiedNode, **kwargs) -> str: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" - ) + raise pydough.active_session.error_builder.expected_collection(qualified) relational: RelationalRoot = convert_ast_to_relational( qualified, column_selection, config, database.dialect ) @@ -161,9 +151,7 @@ def to_df(node: UnqualifiedNode, **kwargs) -> pd.DataFrame: graph, config, database = _load_session_info(**kwargs) qualified: PyDoughQDAG = qualify_node(node, graph, config) if not isinstance(qualified, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Final qualified expression must be a collection, found {qualified.__class__.__name__}" - ) + raise pydough.active_session.error_builder.expected_collection(qualified) relational: RelationalRoot = convert_ast_to_relational( qualified, column_selection, config, database.dialect ) diff --git a/pydough/pydough_operators/base_operator.py b/pydough/pydough_operators/base_operator.py index a7750c1a3..1983eb406 100644 --- a/pydough/pydough_operators/base_operator.py +++ b/pydough/pydough_operators/base_operator.py @@ -61,12 +61,11 @@ def verify_allows_args(self, args: list[Any]) -> None: try: self.verifier.accepts(args) except PyDoughQDAGException as e: - # If the verifier failed, raise the error with the same traceback - # but prepend it with information about the operator and args - # that caused the failure. - arg_strings: list[str] = [str(arg) for arg in args] - msg = f"Invalid operator invocation {self.to_string(arg_strings)!r}: {e}" - raise PyDoughQDAGException(msg).with_traceback(e.__traceback__) + import pydough + + raise pydough.active_session.error_builder.type_verification_fail( + self, args, str(e) + ) @abstractmethod def to_string(self, arg_strings: list[str]) -> str: diff --git a/pydough/pydough_operators/expression_operators/binary_operators.py b/pydough/pydough_operators/expression_operators/binary_operators.py index 2fb74135b..eeed8319b 100644 --- a/pydough/pydough_operators/expression_operators/binary_operators.py +++ b/pydough/pydough_operators/expression_operators/binary_operators.py @@ -6,7 +6,6 @@ from enum import Enum -from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, TypeVerifier, @@ -44,7 +43,7 @@ def from_string(s: str) -> "BinOp": for op in BinOp.__members__.values(): if s == op.value: return op - raise PyDoughQDAGException(f"Unrecognized operation: {s!r}") + raise ValueError(f"Unrecognized operation: {s!r}") BinOp.__members__.items() diff --git a/pydough/pydough_operators/expression_operators/expression_operator.py b/pydough/pydough_operators/expression_operators/expression_operator.py index 8e437dae0..7b801342f 100644 --- a/pydough/pydough_operators/expression_operators/expression_operator.py +++ b/pydough/pydough_operators/expression_operators/expression_operator.py @@ -7,6 +7,7 @@ from abc import abstractmethod from typing import Any +from pydough.errors import PyDoughQDAGException from pydough.pydough_operators.base_operator import PyDoughOperator from pydough.pydough_operators.type_inference import ( ExpressionTypeDeducer, @@ -97,4 +98,12 @@ def infer_return_type(self, args: list[Any]) -> PyDoughType: Raises: `PyDoughQDAGException` if `args` is invalid for this operator. """ - return self.deducer.infer_return_type(args) + + try: + return self.deducer.infer_return_type(args) + except PyDoughQDAGException as e: + import pydough + + raise pydough.active_session.error_builder.type_inference_fail( + self, args, str(e) + ) diff --git a/pydough/qdag/README.md b/pydough/qdag/README.md index 4284e4b1a..8b4fd054b 100644 --- a/pydough/qdag/README.md +++ b/pydough/qdag/README.md @@ -79,8 +79,7 @@ child_reference_node = builder.build_child_reference_expression([child_collectio # Build a CALCULATE node # Equivalent PyDough code: `TPCH.Nations.CALCULATE(region_name=region.name)` -calculate_node = builder.build_calc(table_collection, [child_collection]) -calculate_node = calculate_node.with_terms([("region_name", child_reference_node)]) +calculate_node = builder.build_calc(table_collection, [child_collection], [("region_name", child_reference_node)]) # Build a WHERE node # Equivalent PyDough code: `TPCH.Nations.WHERE(region.name == "ASIA")` @@ -88,8 +87,7 @@ condition = builder.build_expression_function_call( "EQU", [child_reference_node, builder.build_literal("ASIA", StringType())] ) -where_node = builder.build_where(table_collection, [child_collection]) -where_node = where_node.with_condition(condition) +where_node = builder.build_where(table_collection, [child_collection], condition) # Build a SINGULAR node # Equivalent PyDough code: `Regions.CALCULATE(n_4_nation=nations.WHERE(key == 4).SINGULAR().name)` @@ -103,15 +101,13 @@ key_ref = builder.build_reference(nations_sub_collection, "key") literal_4 = builder.build_literal(4, NumericType()) condition = builder.build_expression_function_call("EQU", [key_ref, literal_4]) # Build WHERE node with condition -where_node = builder.build_where(nations_sub_collection, []) -where_node = where_node.with_condition(condition) +where_node = builder.build_where(nations_sub_collection, [], condition) # Create SINGULAR node from filtered result singular_node = builder.build_singular(where_node) # Build reference node for name reference_node = builder.build_reference(singular_node, "name") # Build CALCULATE node with calculated term -calculate_node = builder.build_calc(regions_collection, [nations_sub_collection]) -calculate_node = calculate_node.with_terms([("n_4_nation", reference_node)]) +calculate_node = builder.build_calc(regions_collection, [nations_sub_collection], [("n_4_nation", reference_node)]) # Build an ORDER BY node @@ -119,20 +115,17 @@ calculate_node = calculate_node.with_terms([("n_4_nation", reference_node)]) collation_expression = builder.build_collation_expression( reference_node, True, False ) -order_by_node = builder.build_order(table_collection, []) -order_by_node = order_by_node.with_collation([collation_expression]) +order_by_node = builder.build_order(table_collection, [], [collation_expression]) # Build a TOP K node # Equivalent PyDough code: `TPCH.Nations.TOP_K(5, by=name.ASC(na_pos='first'))` -top_k_node = builder.build_top_k(table_collection, [], 5) -top_k_node = top_k_node.with_collation([collation_expression]) +top_k_node = builder.build_top_k(table_collection, [], 5 [collation_expression]) # Build a PARTITION BY node # Equivalent PyDough code: `TPCH.PARTITION(Parts, name="p", by=part_type)` part_collection = builder.build_child_access("Parts", global_context_node) partition_key = builder.build_reference(part_collection, "part_type") -partition_by_node = builder.build_partition(part_collection, child_collection, "p") -partition_by_node = partition_by_node.with_keys([partition_key]) +partition_by_node = builder.build_partition(part_collection, child_collection, "p", [partition_key]) # Build a child reference collection node # Equivalent PyDough code: `Nations.CALCULATE(n_customers=COUNT(customers))` @@ -146,8 +139,7 @@ count_call = builder.build_expression_function_call( "COUNT", [child_reference_collection_node] ) -calculate_node = builder.build_calc(table_collection, [customers_child]) -calculate_node = calculate_node.with_terms([("n_customers", count_call)]) +calculate_node = builder.build_calc(table_collection, [customers_child], [("n_customers", count_call)]) # Build a window function call node # Equivalent PyDough code: `RANKING(by=TPCH.Nations.name, levels=1, allow_ties=True)` @@ -157,7 +149,7 @@ window_call_node = builder.build_window_call(RANKING, [reference_node], 1, {"all ### HAS/HASNOT Rewrite -The `has_hasnot_rewrite` function is used to transform `HAS` and `HASNOT` expressions in the QDAG. It is used in the `with_terms`, `with_condition`, and `with_collation` calls of the various child operator classes to rewrite all `HAS(x)` into `COUNT(X) > 0` and all `HASNOT(X)` into `COUNT(X) == 0` unless they are in the conjunction of a `WHERE` clause. +The `has_hasnot_rewrite` function is used to transform `HAS` and `HASNOT` expressions in the QDAG. It is used in constructors of the various child operator classes to rewrite all `HAS(x)` into `COUNT(X) > 0` and all `HASNOT(X)` into `COUNT(X) == 0` unless they are in the conjunction of a `WHERE` clause. Below are some examples of PyDough snippets that are/aren't affected by the rewrite. diff --git a/pydough/qdag/collections/calculate.py b/pydough/qdag/collections/calculate.py index b4d48664b..585b8c476 100644 --- a/pydough/qdag/collections/calculate.py +++ b/pydough/qdag/collections/calculate.py @@ -12,9 +12,7 @@ from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( - BackReferenceExpression, PyDoughExpressionQDAG, - Reference, ) from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -31,63 +29,20 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + terms: list[tuple[str, PyDoughExpressionQDAG]], ): super().__init__(predecessor, children) - # Not initialized until with_terms is called - self._calc_term_indices: dict[str, int] | None = None - self._calc_term_values: dict[str, PyDoughExpressionQDAG] | None = None + self._calc_term_indices: dict[str, int] = {} + self._calc_term_values: dict[str, PyDoughExpressionQDAG] = {} self._all_term_names: set[str] = set() self._ancestral_mapping: dict[str, int] = dict( predecessor.ancestral_mapping.items() ) self._calc_terms: set[str] = set() - def with_terms(self, terms: list[tuple[str, PyDoughExpressionQDAG]]) -> "Calculate": - """ - Specifies the terms that are calculated inside of a CALCULATE node, - returning the mutated CALCULATE node afterwards. This is called after - the CALCULATE node is created so that the terms can be expressions that - reference child nodes of the CALCULATE. However, this must be called - on the CALCULATE node before any properties are accessed by - `calc_terms`, `all_terms`, `to_string`, etc. - - Args: - `terms`: the list of terms calculated in the CALCULATE node as a - list of tuples in the form `(name, expression)`. Each `expression` - can contain `ChildReferenceExpression` instances that refer to a - property of one of the children of the CALCULATE node. - - Returns: - The mutated CALCULATE node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the terms have already been added to the - CALCULATE node. - """ - if self._calc_term_indices is not None: - raise PyDoughQDAGException( - "Cannot call `with_terms` on a CALCULATE node more than once" - ) # Include terms from the predecessor, with the terms from this # CALCULATE added in. - self._calc_term_indices = {} - self._calc_term_values = {} for idx, (name, value) in enumerate(terms): - ancestral_idx: int = self.ancestral_mapping.get(name, 0) - if ancestral_idx > 0: - # Ignore no-op back-references, e.g.: - # region(region_name=name).customers(region_name=region_name) - if not ( - ( - isinstance(value, BackReferenceExpression) - and value.back_levels == ancestral_idx - and value.term_name == name - ) - or isinstance(value, Reference) - ): - raise PyDoughQDAGException( - f"Cannot redefine term {name!r} in CALCULATE that is already defined in an ancestor" - ) self._calc_term_indices[name] = idx self._calc_term_values[name] = has_hasnot_rewrite(value, False) self._all_term_names.add(name) @@ -95,7 +50,6 @@ def with_terms(self, terms: list[tuple[str, PyDoughExpressionQDAG]]) -> "Calcula self.ancestral_mapping[name] = 0 self.all_terms.update(self.preceding_context.all_terms) self.verify_singular_terms(self._calc_term_values.values()) - return self @property def calc_term_indices( @@ -105,10 +59,6 @@ def calc_term_indices( Mapping of each named expression of the CALCULATE to the index of the ordinal position of the property when included in a CALCULATE. """ - if self._calc_term_indices is None: - raise PyDoughQDAGException( - "Cannot access `calc_term_indices` of a CALCULATE node before adding calc terms with `with_terms`" - ) return self._calc_term_indices @property @@ -119,10 +69,6 @@ def calc_term_values( Mapping of each named expression of the CALCULATE to the QDAG node for that expression. """ - if self._calc_term_values is None: - raise PyDoughQDAGException( - "Cannot access `_calc_term_values` of a CALCULATE node before adding calc terms with `with_terms`" - ) return self._calc_term_values @property diff --git a/pydough/qdag/collections/collection_access.py b/pydough/qdag/collections/collection_access.py index 75f167e6e..2fae30d5b 100644 --- a/pydough/qdag/collections/collection_access.py +++ b/pydough/qdag/collections/collection_access.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.errors import PyDoughQDAGException from pydough.metadata import ( CollectionMetadata, @@ -115,8 +116,8 @@ def get_term(self, term_name: str) -> PyDoughQDAG: # Verify that the ancestor name is not also a name in the current # context. if term_name in self.calc_terms: - raise PyDoughQDAGException( - f"Cannot have term name {term_name!r} used in an ancestor of collection {self!r}" + raise pydough.active_session.error_builder.down_streaming_conflict( + collection=self, term_name=term_name ) # Create a back-reference to the ancestor term. return BackReferenceExpression( @@ -148,7 +149,7 @@ def get_term_from_property(self, term_name: str) -> PyDoughQDAG: elif isinstance(property, TableColumnMetadata): return ColumnProperty(property) else: - raise PyDoughQDAGException( + raise NotImplementedError( f"Unsupported property type for collection access: {property.__class__.name}" ) diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index d7a7e9b3d..a68ba1468 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -13,7 +13,6 @@ import numpy as np import pydough -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG @@ -172,8 +171,8 @@ def verify_singular_terms(self, exprs: Iterable[PyDoughExpressionQDAG]) -> None: relative_context: PyDoughCollectionQDAG = self.starting_predecessor for expr in exprs: if not expr.is_singular(relative_context): - raise PyDoughQDAGException( - f"Expected all terms in {self.standalone_string} to be singular, but encountered a plural expression: {expr.to_string()}" + raise pydough.active_session.error_builder.cardinality_error( + collection=self, expr=expr ) @abstractmethod @@ -224,9 +223,7 @@ def get_expr(self, term_name: str) -> PyDoughExpressionQDAG: """ term = self.get_term(term_name) if not isinstance(term, PyDoughExpressionQDAG): - raise PyDoughQDAGException( - f"Property {term_name!r} of {self} is not an expression" - ) + raise pydough.active_session.error_builder.expected_expression(term) return term def get_collection(self, term_name: str) -> "PyDoughCollectionQDAG": @@ -243,9 +240,7 @@ def get_collection(self, term_name: str) -> "PyDoughCollectionQDAG": """ term = self.get_term(term_name) if not isinstance(term, PyDoughCollectionQDAG): - raise PyDoughQDAGException( - f"Property {term_name!r} of {self} is not a collection" - ) + raise pydough.active_session.error_builder.expected_collection(term) return term @property diff --git a/pydough/qdag/collections/global_context.py b/pydough/qdag/collections/global_context.py index af034810c..38f56154d 100644 --- a/pydough/qdag/collections/global_context.py +++ b/pydough/qdag/collections/global_context.py @@ -109,10 +109,14 @@ def unique_terms(self) -> list[str]: return [] def is_singular(self, context: PyDoughCollectionQDAG) -> bool: - raise PyDoughQDAGException(f"Cannot call is_singular on {self!r}") + return ( + self.ancestor_context is None + or self.ancestor_context.starting_predecessor == context + or self.ancestor_context.is_singular(context) + ) def get_expression_position(self, expr_name: str) -> int: - raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}") + raise NotImplementedError(f"Cannot call get_expression_position on {self!r}") def get_term(self, term_name: str) -> PyDoughQDAG: self.verify_term_exists(term_name) diff --git a/pydough/qdag/collections/order_by.py b/pydough/qdag/collections/order_by.py index 0322f5310..54e730a08 100644 --- a/pydough/qdag/collections/order_by.py +++ b/pydough/qdag/collections/order_by.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import CollationExpression from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -25,33 +24,9 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + collation: list[CollationExpression], ): super().__init__(predecessor, children) - self._collation: list[CollationExpression] | None = None - - def with_collation(self, collation: list[CollationExpression]) -> "OrderBy": - """ - Specifies the expressions that are used to do the ordering in an - ORDERBY node returning the mutated ORDERBY node afterwards. This is - called after the ORDERBY node is created so that the terms can be - expressions that reference child nodes of the ORDERBY. However, this - must be called on the ORDERBY node before any properties are accessed - by `calc_terms`, `all_terms`, `to_string`, etc. - - Args: - `collation`: the list of collation nodes to order by. - - Returns: - The mutated ORDERBY node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the condition has already been added to - the WHERE node. - """ - if self._collation is not None: - raise PyDoughQDAGException( - "Cannot call `with_collation` more than once per ORDERBY node" - ) self._collation = [ CollationExpression( has_hasnot_rewrite(col.expr, False), col.asc, col.na_last @@ -59,17 +34,12 @@ def with_collation(self, collation: list[CollationExpression]) -> "OrderBy": for col in collation ] self.verify_singular_terms(self._collation) - return self @property def collation(self) -> list[CollationExpression]: """ The ordering keys for the ORDERBY clause. """ - if self._collation is None: - raise PyDoughQDAGException( - "Cannot access `collation` of an ORDERBY node before calling `with_collation`" - ) return self._collation @property diff --git a/pydough/qdag/collections/partition_by.py b/pydough/qdag/collections/partition_by.py index 0d315f213..f6d70f627 100644 --- a/pydough/qdag/collections/partition_by.py +++ b/pydough/qdag/collections/partition_by.py @@ -9,7 +9,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions import ( BackReferenceExpression, @@ -34,49 +33,28 @@ def __init__( ancestor: PyDoughCollectionQDAG, child: PyDoughCollectionQDAG, name: str, + keys: list[ChildReferenceExpression], ): super().__init__([child]) self._ancestor_context: PyDoughCollectionQDAG = ancestor self._child: PyDoughCollectionQDAG = child self._name: str = name - self._keys: list[PartitionKey] | None = None self._key_name_indices: dict[str, int] = {} self._ancestral_mapping: dict[str, int] = { name: level + 1 for name, level in ancestor.ancestral_mapping.items() } self._calc_terms: set[str] = set() self._all_terms: set[str] = set(self.ancestral_mapping) | {self.child.name} - - @property - def name(self) -> str: - return self._name - - def with_keys(self, keys: list[ChildReferenceExpression]) -> "PartitionBy": - """ - Specifies the references to the keys that should be used to partition - the child node. - - Args: - `keys`: the list of references to the keys to partition on. - - Returns: - The mutated PARTITION BY node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the keys have already been added to - the PARTITION BY node. - """ - if self._keys is not None: - raise PyDoughQDAGException( - "Cannot call `with_keys` more than once per PARTITION BY node" - ) self._keys = [PartitionKey(self, key) for key in keys] for idx, ref in enumerate(keys): self._key_name_indices[ref.term_name] = idx self._calc_terms.add(ref.term_name) self.all_terms.update(self._calc_terms) self.verify_singular_terms(self._keys) - return self + + @property + def name(self) -> str: + return self._name @property def ancestor_context(self) -> PyDoughCollectionQDAG: @@ -91,10 +69,6 @@ def keys(self) -> list[PartitionKey]: """ The partitioning keys for the PARTITION BY clause. """ - if self._keys is None: - raise PyDoughQDAGException( - "Cannot access `keys` of an PARTITION BY node before calling `with_keys`" - ) return self._keys @property @@ -103,10 +77,6 @@ def key_name_indices(self) -> dict[str, int]: The names of the partitioning keys for the PARTITION BY clause and the index they have in a CALCULATE. """ - if self._keys is None: - raise PyDoughQDAGException( - "Cannot access `keys` of an PARTITION BY node before calling `with_keys`" - ) return self._key_name_indices @property @@ -199,8 +169,4 @@ def to_tree_form(self, is_last: bool) -> CollectionTreeForm: return tree_form def equals(self, other: object) -> bool: - if self._keys is None: - raise PyDoughQDAGException( - "Cannot invoke `equals` before calling `with_keys`" - ) return isinstance(other, PartitionBy) and self._keys == other._keys diff --git a/pydough/qdag/collections/partition_child.py b/pydough/qdag/collections/partition_child.py index e10222ebf..9a461c1c6 100644 --- a/pydough/qdag/collections/partition_child.py +++ b/pydough/qdag/collections/partition_child.py @@ -8,6 +8,7 @@ from functools import cache +import pydough from pydough.qdag.expressions import ( BackReferenceExpression, CollationExpression, @@ -90,7 +91,15 @@ def inherited_downstreamed_terms(self) -> set[str]: @cache def get_term(self, term_name: str): self.verify_term_exists(term_name) + # Special handling of terms down-streamed from an ancestor of the + # partition child. if term_name in self.ancestral_mapping: + # Verify that the ancestor name is not also a name in the current + # context. + if term_name in self.calc_terms: + raise pydough.active_session.error_builder.down_streaming_conflict( + collection=self, term_name=term_name + ) return BackReferenceExpression( self, term_name, self.ancestral_mapping[term_name] ) diff --git a/pydough/qdag/collections/top_k.py b/pydough/qdag/collections/top_k.py index 5b8b2219f..eda830644 100644 --- a/pydough/qdag/collections/top_k.py +++ b/pydough/qdag/collections/top_k.py @@ -9,6 +9,8 @@ from functools import cache +from pydough.qdag.expressions.collation_expression import CollationExpression + from .collection_qdag import PyDoughCollectionQDAG from .order_by import OrderBy @@ -23,9 +25,10 @@ def __init__( predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], records_to_keep: int, + collation: list[CollationExpression], ): - super().__init__(predecessor, children) self._records_to_keep = records_to_keep + super().__init__(predecessor, children, collation) @property def records_to_keep(self) -> int: diff --git a/pydough/qdag/collections/where.py b/pydough/qdag/collections/where.py index 85d81bb69..fcb8e6d6e 100644 --- a/pydough/qdag/collections/where.py +++ b/pydough/qdag/collections/where.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.expressions import PyDoughExpressionQDAG from pydough.qdag.has_hasnot_rewrite import has_hasnot_rewrite @@ -25,45 +24,17 @@ def __init__( self, predecessor: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + condition: PyDoughExpressionQDAG, ): super().__init__(predecessor, children) - self._condition: PyDoughExpressionQDAG | None = None - - def with_condition(self, condition: PyDoughExpressionQDAG) -> "Where": - """ - Specifies the condition that should be used by the WHERE node. This is - called after the WHERE node is created so that the condition can be an - expressions that reference child nodes of the WHERE. However, this must - be called on the WHERE node before any properties are accessed by - `to_string`, `equals`, etc. - - Args: - `condition`: the expression used to filter. - - Returns: - The mutated WHERE node (which has also been modified in-place). - - Raises: - `PyDoughQDAGException` if the condition has already been added to - the WHERE node. - """ - if self._condition is not None: - raise PyDoughQDAGException( - "Cannot call `with_condition` more than once per Where node" - ) self._condition = has_hasnot_rewrite(condition, True) self.verify_singular_terms([self._condition]) - return self @property def condition(self) -> PyDoughExpressionQDAG: """ The predicate expression for the WHERE clause. """ - if self._condition is None: - raise PyDoughQDAGException( - "Cannot access `condition` of a WHERE node before adding the predicate with `with_condition`" - ) return self._condition @property diff --git a/pydough/qdag/expressions/child_reference_expression.py b/pydough/qdag/expressions/child_reference_expression.py index aeaa9e0de..72294f312 100644 --- a/pydough/qdag/expressions/child_reference_expression.py +++ b/pydough/qdag/expressions/child_reference_expression.py @@ -8,7 +8,6 @@ from functools import cache -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG @@ -29,10 +28,7 @@ def __init__( self._child_idx: int = child_idx self._term_name: str = term_name self._expression: PyDoughExpressionQDAG = self._collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.verify_singular_terms([self.expression]) @property def child_idx(self) -> int: diff --git a/pydough/qdag/expressions/column_property.py b/pydough/qdag/expressions/column_property.py index 5756f7f13..d44e2ab38 100644 --- a/pydough/qdag/expressions/column_property.py +++ b/pydough/qdag/expressions/column_property.py @@ -5,7 +5,7 @@ __all__ = ["ColumnProperty"] -from pydough.errors import PyDoughQDAGException +from pydough.metadata.collections import SimpleTableMetadata from pydough.metadata.properties import TableColumnMetadata from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.types import PyDoughType @@ -46,10 +46,7 @@ def requires_enclosing_parens(self, parent: PyDoughExpressionQDAG) -> bool: return False def to_string(self, tree_form: bool = False) -> str: - if not hasattr(self.column_property.collection, "table_path"): - raise PyDoughQDAGException( - f"collection of {self.column_property.error_name} does not have a 'table_path' field" - ) + assert isinstance(self.column_property.collection, SimpleTableMetadata) table_path: str = self.column_property.collection.table_path column_name: str = self.column_property.column_name return f"Column[{table_path}.{column_name}]" diff --git a/pydough/qdag/expressions/reference.py b/pydough/qdag/expressions/reference.py index ebbb5c468..47b0d39cc 100644 --- a/pydough/qdag/expressions/reference.py +++ b/pydough/qdag/expressions/reference.py @@ -6,7 +6,6 @@ __all__ = ["Reference"] -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG from pydough.types import PyDoughType @@ -24,10 +23,7 @@ def __init__(self, collection: PyDoughCollectionQDAG, term_name: str): self._collection: PyDoughCollectionQDAG = collection self._term_name: str = term_name self._expression: PyDoughExpressionQDAG = collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.verify_singular_terms([self._expression]) @property def collection(self) -> PyDoughCollectionQDAG: diff --git a/pydough/qdag/expressions/sided_reference.py b/pydough/qdag/expressions/sided_reference.py index c8b150fcb..a53a75587 100644 --- a/pydough/qdag/expressions/sided_reference.py +++ b/pydough/qdag/expressions/sided_reference.py @@ -6,7 +6,6 @@ __all__ = ["SidedReference"] -from pydough.errors import PyDoughQDAGException from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.collections.collection_qdag import PyDoughCollectionQDAG from pydough.types import PyDoughType @@ -34,10 +33,7 @@ def __init__( else: base_collection = collection.starting_predecessor self._expression: PyDoughExpressionQDAG = base_collection.get_expr(term_name) - if not self.expression.is_singular(collection.starting_predecessor): - raise PyDoughQDAGException( - f"Cannot reference plural expression {self.expression} from {self.collection}" - ) + collection.starting_predecessor.verify_singular_terms([self.expression]) self._is_parent: bool = is_parent @property diff --git a/pydough/qdag/node_builder.py b/pydough/qdag/node_builder.py index d20b26ad2..14eed78e1 100644 --- a/pydough/qdag/node_builder.py +++ b/pydough/qdag/node_builder.py @@ -257,95 +257,100 @@ def build_calculate( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + terms: list[tuple[str, PyDoughExpressionQDAG]], ) -> Calculate: """ - Creates a CALCULATE instance, but `with_terms` still needs to be called on - the output. + Creates a CALCULATE instance. Args: `preceding_context`: the preceding collection. - `children`: the child collections accessed by the CALCULATE term. + `children`: the child collections accessed by the CALCULATE clause. + `terms`: the terms to be defined in the CALCULATE. Returns: - The newly created PyDough CALCULATE term. + The newly created PyDough CALCULATE clause. """ - return Calculate(preceding_context, children) + return Calculate(preceding_context, children, terms) def build_where( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + condition: PyDoughExpressionQDAG, ) -> Where: """ - Creates a WHERE instance, but `with_condition` still needs to be called on - the output. + Creates a WHERE instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the WHERE term. + `condition`: the condition to be applied in the WHERE clause. Returns: The newly created PyDough WHERE instance. """ - return Where(preceding_context, children) + return Where(preceding_context, children, condition) def build_order( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], + collation: list[CollationExpression], ) -> OrderBy: """ - Creates a ORDERBY instance, but `with_collation` still needs to be called on - the output. + Creates a ORDERBY instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the ORDERBY term. + `collation`: the collation expressions to be used in the ORDERBY. Returns: The newly created PyDough ORDERBY instance. """ - return OrderBy(preceding_context, children) + return OrderBy(preceding_context, children, collation) def build_top_k( self, preceding_context: PyDoughCollectionQDAG, children: list[PyDoughCollectionQDAG], records_to_keep: int, + collation: list[CollationExpression], ) -> TopK: """ - Creates a TOP K instance, but `with_collation` still needs to be called on - the output. + Creates a TOP K instance. Args: `preceding_context`: the preceding collection. `children`: the child collections accessed by the ORDERBY term. `records_to_keep`: the `K` value in the TOP K. + `collation`: the collation expressions to be used in the TOP K. Returns: The newly created PyDough TOP K instance. """ - return TopK(preceding_context, children, records_to_keep) + return TopK(preceding_context, children, records_to_keep, collation) def build_partition( self, preceding_context: PyDoughCollectionQDAG, child: PyDoughCollectionQDAG, name: str, + keys: list[ChildReferenceExpression], ) -> PartitionBy: """ - Creates a PARTITION BY instance, but `with_keys` still needs to be called on - the output. + Creates a PARTITION BY instance. Args: `preceding_context`: the preceding collection. `child`: the child that is the input to the PARTITION BY term. `name`: the name that is used to refer to the partitioned data. + `keys`: the partitioning keys to be used in the PARTITION BY. Returns: The newly created PyDough PARTITION BY instance. """ - return PartitionBy(preceding_context, child, name) + return PartitionBy(preceding_context, child, name, keys) def build_child_reference_collection( self, diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index e6fdf3e44..8179436c0 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -19,7 +19,6 @@ ) from pydough.qdag import ( AstNodeBuilder, - Calculate, ChildOperatorChildAccess, ChildReferenceExpression, CollationExpression, @@ -27,7 +26,6 @@ ExpressionFunctionCall, GlobalContext, Literal, - OrderBy, PartitionBy, PyDoughCollectionQDAG, PyDoughExpressionQDAG, @@ -35,8 +33,6 @@ Reference, SidedReference, SubCollection, - TopK, - Where, WindowCall, ) from pydough.types import PyDoughType @@ -673,8 +669,7 @@ def qualify_calculate( qualified_term = self.qualify_expression(term, qualified_parent, children) qualified_terms.append((name, qualified_term)) # Use the qualified children & terms to create a new CALCULATE node. - calculate: Calculate = self.builder.build_calculate(qualified_parent, children) - return calculate.with_terms(qualified_terms) + return self.builder.build_calculate(qualified_parent, children, qualified_terms) def qualify_where( self, @@ -715,8 +710,7 @@ def qualify_where( unqualified_cond, qualified_parent, children ) # Use the qualified children & condition to create a new WHERE node. - where: Where = self.builder.build_where(qualified_parent, children) - return where.with_condition(qualified_cond) + return self.builder.build_where(qualified_parent, children, qualified_cond) def _expressions_to_collations( self, terms: Iterable[UnqualifiedNode] | list[UnqualifiedNode] @@ -801,8 +795,9 @@ def qualify_order_by( raise PyDoughUnqualifiedException( "ORDER BY requires a 'by' clause to be specified." ) - orderby: OrderBy = self.builder.build_order(qualified_parent, children) - return orderby.with_collation(qualified_collations) + return self.builder.build_order( + qualified_parent, children, qualified_collations + ) def qualify_top_k( self, @@ -858,10 +853,9 @@ def qualify_top_k( "TopK requires a 'by' clause to be specified." ) # Use the qualified children & collation to create a new TOP K node. - topk: TopK = self.builder.build_top_k( - qualified_parent, children, records_to_keep + return self.builder.build_top_k( + qualified_parent, children, records_to_keep, qualified_collations ) - return topk.with_collation(qualified_collations) def split_partition_ancestry( self, node: UnqualifiedNode, partition_ancestor: str | None = None @@ -1036,9 +1030,8 @@ def qualify_partition( child_references.append(child_ref) # Use the qualified child & keys to create a new PARTITION node. partition: PartitionBy = self.builder.build_partition( - qualified_parent, qualified_child, child_name + qualified_parent, qualified_child, child_name, child_references ) - partition = partition.with_keys(child_references) # Special case: if accessing as a child, wrap in a # ChildOperatorChildAccess term. if isinstance(unqualified_parent, UnqualifiedRoot) and is_child: @@ -1076,9 +1069,7 @@ def qualify_collection( unqualified, context, [], is_child, is_cross ) if not isinstance(answer, PyDoughCollectionQDAG): - raise PyDoughUnqualifiedException( - f"Expected a collection, but received an expression: {answer}" - ) + raise pydough.active_session.error_builder.expected_collection(answer) return answer def qualify_expression( @@ -1109,9 +1100,7 @@ def qualify_expression( unqualified, context, children, True, False ) if not isinstance(answer, PyDoughExpressionQDAG): - raise PyDoughUnqualifiedException( - f"Expected an expression, but received a collection: {answer}" - ) + raise pydough.active_session.error_builder.expected_expression(answer) return answer def qualify_singular( @@ -1193,8 +1182,8 @@ def qualify_best( # Build the final expanded window-based filter qualified_child: PyDoughCollectionQDAG = self.builder.build_where( - qualified_parent, children - ).with_condition(qualified_cond) + qualified_parent, children, qualified_cond + ) # Extract the `levels` argument from the condition assert isinstance(qualified_cond, ExpressionFunctionCall) diff --git a/pydough/unqualified/unqualified_transform.py b/pydough/unqualified/unqualified_transform.py index 3cea3782e..d5a2d9150 100644 --- a/pydough/unqualified/unqualified_transform.py +++ b/pydough/unqualified/unqualified_transform.py @@ -10,9 +10,9 @@ import types from typing import Any +from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GraphMetadata -from .errors import PyDoughUnqualifiedException from .unqualified_node import UnqualifiedNode diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e9adc64a8..e74e11ef1 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1161,7 +1161,7 @@ def test_graph_structure( ├─┬─ Partition[name='part_types', by=part_type] │ └─┬─ AccessChild │ └─── TableCollection[parts] - ├─┬─ Calculate[part_type=part_type, avg_price=AVG($1.retail_price)] + ├─┬─ Calculate[avg_price=AVG($1.retail_price)] │ └─┬─ AccessChild │ └─── PartitionChild[parts] └─┬─ Where[avg_price >= 27.5] diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 56a44feb8..539224e92 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -143,6 +143,7 @@ simple_cross_10, simple_cross_11, simple_cross_12, + simple_cross_13, simple_filter_top_five, simple_int_float_string_cast, simple_scan, @@ -2234,6 +2235,15 @@ ), id="simple_cross_12", ), + pytest.param( + PyDoughPandasTest( + simple_cross_13, + "TPCH", + lambda: pd.DataFrame({"n1": [142], "n2": [8]}), + "simple_cross_13", + ), + id="simple_cross_13", + ), pytest.param( PyDoughPandasTest( simple_var_std, diff --git a/tests/test_plan_refsols/simple_cross_13.txt b/tests/test_plan_refsols/simple_cross_13.txt new file mode 100644 index 000000000..de0ad16f7 --- /dev/null +++ b/tests/test_plan_refsols/simple_cross_13.txt @@ -0,0 +1,14 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) + JOIN(condition=True:bool, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_acctbal <= min_balance + 10.0:numeric, columns={}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'min_balance': t1.min_balance}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'min_balance': MIN(c_acctbal)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=s_acctbal >= max_balance - 10.0:numeric, columns={}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_balance': t1.max_balance, 's_acctbal': t0.s_acctbal}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) + AGGREGATE(keys={}, aggregations={'max_balance': MAX(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) diff --git a/tests/test_pydough_functions/exploration_examples.py b/tests/test_pydough_functions/exploration_examples.py index 39d3abd12..4eb978303 100644 --- a/tests/test_pydough_functions/exploration_examples.py +++ b/tests/test_pydough_functions/exploration_examples.py @@ -106,10 +106,7 @@ def partition_impl() -> UnqualifiedNode: def partition_child_impl() -> UnqualifiedNode: return ( parts.PARTITION(name="part_types", by=part_type) - .CALCULATE( - part_type, - avg_price=AVG(parts.retail_price), - ) + .CALCULATE(avg_price=AVG(parts.retail_price)) .WHERE(avg_price >= 27.5) .parts ) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 6fc5248d8..4a8aeef6e 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -2970,6 +2970,17 @@ def simple_cross_12(): ) +def simple_cross_13(): + # Strange way to count how many customers have the an account balance + # within 10 of the global minimum, and how many suppliers have an account + # balance within 10 of the global maximum. + glob1 = TPCH.CALCULATE(min_balance=MIN(customers.account_balance)) + cust = customers.WHERE(account_balance <= (CROSS(glob1).min_balance + 10.0)) + glob2 = TPCH.CALCULATE(max_balance=MAX(suppliers.account_balance)) + supp = suppliers.WHERE(account_balance >= (CROSS(glob2).max_balance - 10.0)) + return TPCH.CALCULATE(n1=COUNT(cust), n2=COUNT(supp)) + + def quantile_function_test_1(): selected_orders = customers.orders.WHERE(YEAR(order_date) == 1998) return TPCH.CALCULATE( diff --git a/tests/test_qdag_collection.py b/tests/test_qdag_collection.py index 600102ae7..8e38f601b 100644 --- a/tests/test_qdag_collection.py +++ b/tests/test_qdag_collection.py @@ -548,7 +548,6 @@ def region_intra_pct() -> tuple[CollectionTestInfo, str, str]: ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), @@ -1241,7 +1240,6 @@ def test_collections_calc_terms( ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), @@ -1259,14 +1257,14 @@ def test_collections_calc_terms( ], ), ), - "TPCH.Partition(parts.ORDER_BY(retail_price.DESC(na_pos='last')), name='containers', by=container).CALCULATE(container=container, total_price=SUM(parts.retail_price)).parts.CALCULATE(part_name=name, container=container, ratio=retail_price / total_price)", + "TPCH.Partition(parts.ORDER_BY(retail_price.DESC(na_pos='last')), name='containers', by=container).CALCULATE(total_price=SUM(parts.retail_price)).parts.CALCULATE(part_name=name, container=container, ratio=retail_price / total_price)", """ ──┬─ TPCH ├─┬─ Partition[name='containers', by=container] │ └─┬─ AccessChild │ ├─── TableCollection[parts] │ └─── OrderBy[retail_price.DESC(na_pos='last')] - └─┬─ Calculate[container=container, total_price=SUM($1.retail_price)] + └─┬─ Calculate[total_price=SUM($1.retail_price)] ├─┬─ AccessChild │ └─── PartitionChild[parts] ├─── PartitionChild[parts] @@ -2188,7 +2186,6 @@ def test_collections_to_string( ) ** CalculateInfo( [SubCollectionInfo("parts")], - container=ReferenceInfo("container"), total_price=FunctionInfo( "SUM", [ChildReferenceExpressionInfo("retail_price", 0)] ), diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index d5487aaeb..55b4b592c 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -269,6 +269,30 @@ def bad_pydough_impl_23(root: UnqualifiedNode) -> UnqualifiedNode: ) +def bad_pydough_impl_24(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `name` of nations vs customers + return root.nations.CALCULATE(root.name).customers.CALCULATE(root.name) + + +def bad_pydough_impl_25(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `name` of regions vs customers + return root.regions.CALCULATE(root.name).nations.customers.CALCULATE(root.name) + + +def bad_pydough_impl_26(root: UnqualifiedNode) -> UnqualifiedNode: + # Conflict between `n` of partition vs orders + return ( + root.orders.PARTITION("priorities", by=root.order_priority) + .CALCULATE(key=root.COUNT(root.orders)) + .orders.CALCULATE(root.key) + ) + + +def bad_pydough_impl_27(root: UnqualifiedNode) -> UnqualifiedNode: + # Treating CROSS as singular + return root.regions.CALCULATE(n1=root.name, n2=root.CROSS(root.regions).name) + + def bad_replace_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: # Too many arguments to replace return root.nations.CALCULATE( @@ -411,6 +435,26 @@ def bad_str_count_few_args(root: UnqualifiedNode) -> UnqualifiedNode: "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", id="23", ), + pytest.param( + bad_pydough_impl_24, + "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.nations.CALCULATE(name=name).customers", + id="24", + ), + pytest.param( + bad_pydough_impl_25, + "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.regions.CALCULATE(name=name).nations.customers", + id="25", + ), + pytest.param( + bad_pydough_impl_26, + "Unclear whether 'key' refers to a term of the current context or ancestor of collection TPCH.Partition(orders, name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders", + id="26", + ), + pytest.param( + bad_pydough_impl_27, + "Expected all terms in CALCULATE(n1=name, n2=TPCH.regions.name) to be singular, but encountered a plural expression: TPCH.regions.name", + id="27", + ), pytest.param( bad_replace_too_many_args, "Expected between 2 and 3 arguments inclusive, received 4", diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d3c2aef74..ac2274bcd 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -43,18 +43,13 @@ from pydough.pydough_operators import get_operator_by_name from pydough.qdag import ( AstNodeBuilder, - Calculate, ChildOperatorChildAccess, ChildReferenceExpression, CollationExpression, - OrderBy, - PartitionBy, PyDoughCollectionQDAG, PyDoughExpressionQDAG, PyDoughQDAG, Singular, - TopK, - Where, ) from pydough.relational import ( ColumnReference, @@ -630,13 +625,12 @@ def local_build( builder, context, ) - raw_calc: Calculate = builder.build_calculate(context, children) args: list[tuple[str, PyDoughExpressionQDAG]] = [] for name, info in self.args: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) args.append((name, expr)) - return raw_calc.with_terms(args) + return builder.build_calculate(context, children, args) class WhereInfo(ChildOperatorInfo): @@ -666,10 +660,9 @@ def local_build( "Must provide a context when building a WHERE clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_where: Where = builder.build_where(context, children) cond = self.condition.build(builder, context, children) assert isinstance(cond, PyDoughExpressionQDAG) - return raw_where.with_condition(cond) + return builder.build_where(context, children, cond) class SingularInfo(ChildOperatorInfo): @@ -740,13 +733,12 @@ def local_build( "Must provide context and children_contexts when building an ORDER BY clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_order: OrderBy = builder.build_order(context, children) collation: list[CollationExpression] = [] for info, asc, na_last in self.collation: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) collation.append(CollationExpression(expr, asc, na_last)) - return raw_order.with_collation(collation) + return builder.build_order(context, children, collation) class TopKInfo(ChildOperatorInfo): @@ -789,13 +781,12 @@ def local_build( "Must provide context and children_contexts when building a TOPK clause." ) children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) - raw_top_k: TopK = builder.build_top_k(context, children, self.records_to_keep) collation: list[CollationExpression] = [] for info, asc, na_last in self.collation: expr = info.build(builder, context, children) assert isinstance(expr, PyDoughExpressionQDAG) collation.append(CollationExpression(expr, asc, na_last)) - return raw_top_k.with_collation(collation) + return builder.build_top_k(context, children, self.records_to_keep, collation) class PartitionInfo(ChildOperatorInfo): @@ -830,15 +821,12 @@ def local_build( context = builder.build_global_context() children: list[PyDoughCollectionQDAG] = self.build_children(builder, context) assert len(children) == 1 - raw_partition: PartitionBy = builder.build_partition( - context, children[0], self.name - ) keys: list[ChildReferenceExpression] = [] for info in self.keys: expr = info.build(builder, context, children) assert isinstance(expr, ChildReferenceExpression) keys.append(expr) - return raw_partition.with_keys(keys) + return builder.build_partition(context, children[0], self.name, keys) def make_relational_column_reference( From e812143cd0fb13897669fe57999b1ce6c88b9831 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 17 Jul 2025 22:33:14 -0400 Subject: [PATCH 39/97] Overhaul test_qualify_error --- tests/test_qualification_errors.py | 439 +++++------------------------ 1 file changed, 73 insertions(+), 366 deletions(-) diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 55b4b592c..8446904a0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -4,7 +4,6 @@ """ import re -from collections.abc import Callable import pytest @@ -13,7 +12,6 @@ from pydough.metadata import GraphMetadata from pydough.unqualified import ( UnqualifiedNode, - UnqualifiedRoot, qualify_node, ) from tests.testing_utilities import ( @@ -21,477 +19,186 @@ ) -def bad_pydough_impl_01(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance)) - ``` - The problem: there is no property `account_balance` to be accessed from nations. - """ - return root.nations.CALCULATE( - nation_name=root.name, total_balance=root.SUM(root.account_balance) - ) - - -def bad_pydough_impl_02(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(nation_name=FIZZBUZZ(name)) - ``` - The problem: there is no function named FIZZBUZZ, so this looks like a - CALCULATE being done onto a subcollection, which cannot be used as an - expression inside a CALCULATE. - """ - return root.nations.CALCULATE(nation_name=root.FIZZBUZZ(root.name)) - - -def bad_pydough_impl_03(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x) - ``` - The problem: `suppliers.CALCULATE(x=COUNT(supply_records))` is plural with regards - to nations, so accessing its `x` property is still plural, therefore it - cannot be used as a term inside a CALCULATE from the context of nations. - """ - return root.nations.CALCULATE( - y=root.suppliers.CALCULATE(x=root.COUNT(root.supply_records)).x - ) - - -def bad_pydough_impl_04(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.nations.name.hello - ``` - The problem: nations.name is an expression, so invoking `.hello` on it is - not valid. - """ - return root.nations.name.hello - - -def bad_pydough_impl_05(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.Customer(r=nation.region) - ``` - The problem: nation.region is a collection, therefore cannot be used as - an expression in a CALCULATE. - """ - return root.customers.CALCULATE(r=root.nation.region) - - -def bad_pydough_impl_06(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.suppliers.supply_records.CALCULATE(o=lines.order.order_date) - ``` - The problem: lines is plural with regards to supply_records, therefore - lines.order.order_date is also plural and it cannot be used in a CALCULATE - in the context of supply_records. - """ - return root.suppliers.supply_records.CALCULATE(o=root.lines.order.order_date) - - -def bad_pydough_impl_07(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.lines.CALCULATE(v=MUL(extended_price, SUB(1, discount))) - ``` - The problem: there is no function named MUL or SUB, so this looks like a - CALCULATE operation on a subcollection, which cannot be used as an - expression inside of a CALCULATE. - """ - return root.lines.CALCULATE( - v=root.MUL(root.extended_price, root.SUB(1, root.discount)) - ) - - -def bad_pydough_impl_08(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - TPCH.lines.tax = 0 - TPCH.lines.CALCULATE(value=extended_price * tax) - ``` - The problem: writing to an unqualified node is not yet supported. - """ - root.lines.tax = 0 - return root.lines.CALCULATE(value=root.extended_price * root.tax) - - -def bad_pydough_impl_09(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='nations', by=account_balance.DESC()) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: The cardinality is off since even though the `BEST` ensures - the customers are singular with regards to the nation, the nations are - still plural with regards to the region. - """ - best_customer = root.nations.customers.BEST( - per="nations", by=root.account_balance.DESC() - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_10(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), allow_ties=True) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: the presence of `allow_ties=True` means that the `BEST` - operator does not guarantee `nations.customers` is plural with regards to - `regions`. - """ - best_customer = root.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), allow_ties=True - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_11(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3) - regions.CALCULATE(n=best_customer.name) - ``` - The problem: the presence of `n_best=3` means that the `BEST` operator - does not guarantee `nations.customers` is plural with regards to `regions`. - """ - best_customer = root.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), allow_ties=True - ) - return root.regions.CALCULATE(n=best_customer.name) - - -def bad_pydough_impl_12(root: UnqualifiedNode) -> UnqualifiedNode: - """ - Creates an UnqualifiedNode for the following invalid PyDough snippet: - ``` - regions.nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3, allow_ties=True) - ``` - The problem: cannot simultaneously use `n_best=3` and `allow_ties=True`. - """ - return root.regions.nations.customers.BEST( - per="regions", by=root.account_balance.DESC(), n_best=3, allow_ties=True - ) - - -def bad_pydough_impl_13(root: UnqualifiedNode) -> UnqualifiedNode: - # Non-existent per name - return root.customers.orders.CALCULATE(root.RANKING(by=root.key.ASC(), per="custs")) - - -def bad_pydough_impl_14(root: UnqualifiedNode) -> UnqualifiedNode: - # Bad index of valid per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:2") - ) - - -def bad_pydough_impl_15(root: UnqualifiedNode) -> UnqualifiedNode: - # Ambiguous per name - return root.customers.orders.customer.orders.lines.CALCULATE( - root.RANKING(by=root.extended_price.DESC(), per="orders") - ) - - -def bad_pydough_impl_16(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:k") - ) - - -def bad_pydough_impl_17(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:1:2") - ) - - -def bad_pydough_impl_18(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:") - ) - - -def bad_pydough_impl_19(root: UnqualifiedNode) -> UnqualifiedNode: - # Malformed per name - return root.customers.orders.CALCULATE( - root.RANKING(by=root.key.ASC(), per="customers:0") - ) - - -def bad_pydough_impl_20(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_VAR(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_21(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_VARIANCE(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_22(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - var=root.SAMPLE_STD(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_23(root: UnqualifiedNode) -> UnqualifiedNode: - # Internal function name - return root.Nations.CALCULATE( - name=root.name, - std=root.POPULATION_STD(root.suppliers.account_balance), - ) - - -def bad_pydough_impl_24(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `name` of nations vs customers - return root.nations.CALCULATE(root.name).customers.CALCULATE(root.name) - - -def bad_pydough_impl_25(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `name` of regions vs customers - return root.regions.CALCULATE(root.name).nations.customers.CALCULATE(root.name) - - -def bad_pydough_impl_26(root: UnqualifiedNode) -> UnqualifiedNode: - # Conflict between `n` of partition vs orders - return ( - root.orders.PARTITION("priorities", by=root.order_priority) - .CALCULATE(key=root.COUNT(root.orders)) - .orders.CALCULATE(root.key) - ) - - -def bad_pydough_impl_27(root: UnqualifiedNode) -> UnqualifiedNode: - # Treating CROSS as singular - return root.regions.CALCULATE(n1=root.name, n2=root.CROSS(root.regions).name) - - -def bad_replace_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Too many arguments to replace - return root.nations.CALCULATE( - replace_name1=root.REPLACE(root.name, "a", "b", "c"), - ) - - -def bad_replace_few_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Not enough arguments to replace - return root.nations.CALCULATE(replace_name2=root.REPLACE("a")) - - -def bad_str_count_too_many_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Too many arguments to str_count - return root.nations.CALCULATE( - str_count1=root.STRCOUNT(root.name, "a", "b"), - ) - - -def bad_str_count_few_args(root: UnqualifiedNode) -> UnqualifiedNode: - # Not enough arguments to str_count - return root.nations.CALCULATE(str_count2=root.STRCOUNT(root.name)) - - @pytest.mark.parametrize( - "impl, error_msg", + "pydough_text, error_msg", [ pytest.param( - bad_pydough_impl_01, + "result = nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance))", "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: comment, customers, name, region_key, suppliers, region?", - id="01", + id="bad_name", ), pytest.param( - bad_pydough_impl_02, + "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", "PyDough nodes FIZZBUZZ is not callable. Did you mean to use a function?", - id="02", + id="non_function", ), pytest.param( - bad_pydough_impl_03, + "result = nations.CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x)", "Expected all terms in CALCULATE(y=suppliers.CALCULATE(x=COUNT(supply_records)).x) to be singular, but encountered a plural expression: suppliers.CALCULATE(x=COUNT(supply_records)).x", - id="03", + id="bad_plural_1", ), pytest.param( - bad_pydough_impl_04, + "result = TPCH.nations.name.hello", "Expected a collection, but received an expression: TPCH.nations.name", - id="04", + id="expression_instead_of_collection", ), pytest.param( - bad_pydough_impl_05, + "result = customers.CALCULATE(r=nation.region)", "Expected an expression, but received a collection: nation.region", - id="05", + id="collection_instead_of_expression", ), pytest.param( - bad_pydough_impl_06, + "result = suppliers.supply_records.CALCULATE(o=lines.order.order_date)", "Expected all terms in CALCULATE(o=lines.order.order_date) to be singular, but encountered a plural expression: lines.order.order_date", - id="06", + id="bad_plural_2", ), pytest.param( - bad_pydough_impl_07, + "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", "PyDough nodes SUB is not callable. Did you mean to use a function?", - id="07", + id="binop_function_call", ), pytest.param( - bad_pydough_impl_08, + "TPCH.lines.tax = 0", "PyDough objects do not yet support writing properties to them.", - id="08", + id="setattr", ), pytest.param( - bad_pydough_impl_09, + "best_customer = nations.customers.BEST(per='nations', by=account_balance.DESC())\n" + "result = regions.CALCULATE(n=best_customer.name)", "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=1, allow_ties=False) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=1, allow_ties=False) == 1).name", - id="09", + id="bad_best_1", ), pytest.param( - bad_pydough_impl_10, + "best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), allow_ties=True)\n" + "result = regions.CALCULATE(n=best_customer.name)", "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name", - id="10", + id="bad_best_2", ), pytest.param( - bad_pydough_impl_11, - "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=True) == 1).name", - id="11", + "best_customer = nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3)\n" + "result = regions.CALCULATE(n=best_customer.name)", + "Expected all terms in CALCULATE(n=nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=False) <= 3).name) to be singular, but encountered a plural expression: nations.customers.WHERE(RANKING(by=(account_balance.DESC(na_pos='last')), levels=2, allow_ties=False) <= 3).name", + id="bad_best_3", ), pytest.param( - bad_pydough_impl_12, + "result = regions.nations.customers.BEST(per='regions', by=account_balance.DESC(), n_best=3, allow_ties=True)", "Cannot allow ties when multiple best values are requested", - id="12", + id="bad_best_4", ), pytest.param( - bad_pydough_impl_13, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='custs'))", "Per string refers to unrecognized ancestor 'custs' of TPCH.customers.orders", - id="13", + id="bad_per_1", ), pytest.param( - bad_pydough_impl_14, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:2'))", "Per string 'customers:2' invalid as there are not 2 ancestors of the current context with name 'customers'.", - id="14", + id="bad_per_2", ), pytest.param( - bad_pydough_impl_15, + "result = customers.orders.customer.orders.lines.CALCULATE(RANKING(by=extended_price.DESC(), per='orders'))", "Per string 'orders' is ambiguous for TPCH.customers.orders.customer.orders.lines. Use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor.", - id="15", + id="bad_per_3", ), pytest.param( - bad_pydough_impl_16, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:k'))", "Malformed per string: 'customers:k' (expected the index after ':' to be a positive integer)", - id="16", + id="bad_per_4", ), pytest.param( - bad_pydough_impl_17, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:1:2'))", "Malformed per string: 'customers:1:2' (expected 0 or 1 ':', found 2)", - id="17", + id="bad_per_5", ), pytest.param( - bad_pydough_impl_18, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:'))", "Malformed per string: 'customers:' (expected the index after ':' to be a positive integer)", - id="18", + id="bad_per_6", ), pytest.param( - bad_pydough_impl_19, + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:0'))", "Malformed per string: 'customers:0' (expected the index after ':' to be a positive integer)", - id="19", + id="bad_per_7", ), pytest.param( - bad_pydough_impl_20, + "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", "PyDough nodes SAMPLE_VAR is not callable. Did you mean to use a function?", - id="20", + id="kwargfunc_1", ), pytest.param( - bad_pydough_impl_21, + "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", "PyDough nodes SAMPLE_VARIANCE is not callable. Did you mean to use a function?", - id="21", + id="kwargfunc_2", ), pytest.param( - bad_pydough_impl_22, + "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", "PyDough nodes SAMPLE_STD is not callable. Did you mean to use a function?", - id="22", + id="kwargfunc_3", ), pytest.param( - bad_pydough_impl_23, + "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", - id="23", + id="kwargfunc_4", ), pytest.param( - bad_pydough_impl_24, + "result = nations.CALCULATE(name).customers.CALCULATE(name)", "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.nations.CALCULATE(name=name).customers", - id="24", + id="downstream_1", ), pytest.param( - bad_pydough_impl_25, + "result = regions.CALCULATE(name).nations.customers.CALCULATE(name)", "Unclear whether 'name' refers to a term of the current context or ancestor of collection TPCH.regions.CALCULATE(name=name).nations.customers", - id="25", + id="downstream_2", ), pytest.param( - bad_pydough_impl_26, + "result = orders.PARTITION(name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders.CALCULATE(key)", "Unclear whether 'key' refers to a term of the current context or ancestor of collection TPCH.Partition(orders, name='priorities', by=order_priority).CALCULATE(key=COUNT(orders)).orders", - id="26", + id="downstream_3", ), pytest.param( - bad_pydough_impl_27, + "result = regions.CALCULATE(n1=name, n2=CROSS(regions).name)", "Expected all terms in CALCULATE(n1=name, n2=TPCH.regions.name) to be singular, but encountered a plural expression: TPCH.regions.name", - id="27", + id="plural_cross", ), pytest.param( - bad_replace_too_many_args, + "result = nations.CALCULATE(replace_name1=REPLACE(name, 'a', 'b', 'c'))", "Expected between 2 and 3 arguments inclusive, received 4", id="bad_replace_too_many_args", ), pytest.param( - bad_replace_few_args, + "result = nations.CALCULATE(replace_name2=REPLACE('a'))", "Expected between 2 and 3 arguments inclusive, received 1", id="bad_replace_few_args", ), pytest.param( - bad_str_count_too_many_args, + "result = nations.CALCULATE(str_count1=STRCOUNT(name, 'a', 'b'))", "Expected 2 arguments, received 3", id="bad_str_count_too_many_args", ), pytest.param( - bad_str_count_few_args, + "result = nations.CALCULATE(str_count2=STRCOUNT(name))", "Expected 2 arguments, received 1", id="bad_str_count_few_args", ), ], ) def test_qualify_error( - impl: Callable[[UnqualifiedNode], UnqualifiedNode], + pydough_text: str, error_msg: str, get_sample_graph: graph_fetcher, ) -> None: """ - Tests that strings representing the setup of PyDough unqualified objects - (with unknown variables already pre-pended with `_ROOT.`) are correctly - transformed into UnqualifiedNode objects with an expected string - representation. Each `pydough_str` should be called with `exec` to define - a variable `answer` that is an `UnqualifiedNode` instance. + Tests that the qualification process correctly raises the expected error + messages when the PyDough text is invalid. Takes in the PyDough text and + converts it to unqualified nodes with `from_string`, then qualifies it to + ensure that the error is raised as expected. The PyDough text can be 1 or + multiple lines, but must end with storing the answers in a variable + called `result`. """ graph: GraphMetadata = get_sample_graph("TPCH") - root: UnqualifiedNode = UnqualifiedRoot(graph) default_config: PyDoughConfigs = pydough.active_session.config with pytest.raises(Exception, match=re.escape(error_msg)): - unqualified: UnqualifiedNode = impl(root) + unqualified: UnqualifiedNode = pydough.from_string( + pydough_text, answer_variable="result", metadata=graph + ) qualify_node(unqualified, graph, default_config) From 993553f480c9bb571214a49e4dc9090582fe5b47 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 14:35:40 -0400 Subject: [PATCH 40/97] Moving more errors --- .../database_connectors/database_connector.py | 7 ++- pydough/errors/pydough_error_builder.py | 50 ++++++++++++++++++- pydough/sqlglot/execute_relational.py | 6 ++- .../sqlglot_relational_expression_visitor.py | 12 +++-- 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index c9eb8bd7c..e16426b1e 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -11,7 +11,8 @@ import pandas as pd -from pydough.errors import PyDoughSessionException, PyDoughSQLException +import pydough +from pydough.errors import PyDoughSessionException from .db_types import DBConnection, DBCursor @@ -49,7 +50,9 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: cursor.execute(sql) except Exception as e: print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") - raise PyDoughSQLException(*e.args) from e + raise pydough.active_session.error_builder.sql_runtime_failure( + sql, e, True + ) from e column_names: list[str] = [description[0] for description in cursor.description] # No need to close the cursor, as its closed by del. # TODO: (gh #174) Cache the cursor? diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 33c093c4b..0a560b520 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -4,11 +4,12 @@ from typing import TYPE_CHECKING -from pydough.errors import PyDoughException, PyDoughQDAGException +from pydough.errors import PyDoughException, PyDoughQDAGException, PyDoughSQLException if TYPE_CHECKING: from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG + from pydough.relational import CallExpression class PyDoughErrorBuilder: @@ -182,3 +183,50 @@ def bad_columns(self, columns: object) -> PyDoughException: return PyDoughQDAGException( f"Expected `columns` argument to be a list or dictionary, found {columns.__class__.__name__}" ) + + def sql_runtime_failure( + self, sql: str, error: Exception, execute: bool + ) -> PyDoughException: + """ + Creates an exception for when a SQL query fails to execute at runtime + or optimization. + + Args: + `sql`: The SQL query that failed. + `error`: The exception raised during the SQL execution or + optimization. + `execute`: Whether the failure occurred during execution (True) or + optimization (False). + + Returns: + An exception indicating the SQL runtime/optimization failure. + """ + if execute: + return PyDoughSQLException( + "SQL query execution failed. Please check the query syntax and database connection:\n" + f"{sql}\nError: {error}" + ) + else: + return PyDoughSQLException( + "SQL query optimization failed. Please check the query syntax:\n" + f"{sql}\nError: {error}" + ) + + def sql_call_conversion_error( + self, call: "CallExpression", error: Exception + ) -> PyDoughException: + """ + Creates an exception for when the conversion of a call expression from + Relational to SQL fails. + + Args: + `call`: The relational function call expression that + failed to convert. + `error`: The exception raised during the conversion. + + Returns: + An exception indicating the SQL call conversion failure. + """ + return PyDoughQDAGException( + f"Failed to convert expression {call.to_string(True)} to SQL: {error}" + ) diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index a4d5801ac..281e5859a 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -23,12 +23,12 @@ from sqlglot.optimizer.simplify import simplify from sqlglot.optimizer.unnest_subqueries import unnest_subqueries +import pydough from pydough.configs import PyDoughConfigs from pydough.database_connectors import ( DatabaseContext, DatabaseDialect, ) -from pydough.errors import PyDoughSQLException from pydough.logger import get_logger from pydough.relational import RelationalRoot from pydough.relational.relational_expressions import ( @@ -69,7 +69,9 @@ def convert_relation_to_sql( except SqlglotError as e: sql_text: str = glot_expr.sql(sqlglot_dialect, pretty=True) print(f"ERROR WHILE OPTIMIZING QUERY:\n{sql_text}") - raise PyDoughSQLException(*e.args) + raise pydough.active_session.error_builder.sql_runtime_failure( + sql_text, e, False + ) from e # Convert the optimized AST back to a SQL string. return glot_expr.sql(sqlglot_dialect, pretty=True) diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index da8c8b09d..343ed6ab7 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -14,6 +14,7 @@ from sqlglot.expressions import Identifier from sqlglot.expressions import Star as SQLGlotStar +import pydough import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.database_connectors import DatabaseDialect @@ -76,9 +77,14 @@ def visit_call_expression(self, call_expression: CallExpression) -> None: input_types: list[PyDoughType] = [ arg.data_type for arg in call_expression.inputs ] - output_expr: SQLGlotExpression = self._bindings.convert_call_to_sqlglot( - call_expression.op, input_exprs, input_types - ) + try: + output_expr: SQLGlotExpression = self._bindings.convert_call_to_sqlglot( + call_expression.op, input_exprs, input_types + ) + except Exception as e: + raise pydough.active_session.error_builder.sql_call_conversion_error( + call_expression, e + ) self._stack.append(output_expr) @staticmethod From 85e7c8fd57ec106f4f4ee0686eb753da4c73bc82 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 18:25:55 -0400 Subject: [PATCH 41/97] Minor adjustment to how pagerank was written --- tests/test_plan_refsols/pagerank_a0.txt | 10 +-- tests/test_plan_refsols/pagerank_a1.txt | 16 ++--- tests/test_plan_refsols/pagerank_a2.txt | 24 +++---- tests/test_plan_refsols/pagerank_a6.txt | 56 +++++++-------- tests/test_plan_refsols/pagerank_b3.txt | 32 ++++----- tests/test_plan_refsols/pagerank_c4.txt | 40 +++++------ tests/test_plan_refsols/pagerank_d5.txt | 48 ++++++------- tests/test_plan_refsols/pagerank_h8.txt | 72 +++++++++---------- .../simple_pydough_functions.py | 9 ++- tests/test_sql_refsols/pagerank_a0_sqlite.sql | 16 ++--- tests/test_sql_refsols/pagerank_a1_sqlite.sql | 11 ++- tests/test_sql_refsols/pagerank_a2_sqlite.sql | 17 ++--- tests/test_sql_refsols/pagerank_a6_sqlite.sql | 41 ++++------- tests/test_sql_refsols/pagerank_b3_sqlite.sql | 23 +++--- tests/test_sql_refsols/pagerank_c4_sqlite.sql | 29 +++----- tests/test_sql_refsols/pagerank_d5_sqlite.sql | 35 ++++----- tests/test_sql_refsols/pagerank_h8_sqlite.sql | 53 +++++--------- 17 files changed, 238 insertions(+), 294 deletions(-) diff --git a/tests/test_plan_refsols/pagerank_a0.txt b/tests/test_plan_refsols/pagerank_a0.txt index 89355a8da..d7e58adbe 100644 --- a/tests/test_plan_refsols/pagerank_a0.txt +++ b/tests/test_plan_refsols/pagerank_a0.txt @@ -1,7 +1,3 @@ -ROOT(columns=[('key', anything_s_key), ('page_rank', page_rank_0)], orderings=[(anything_s_key):asc_first]) - PROJECT(columns={'anything_s_key': anything_s_key, 'page_rank_0': ROUND(1.0:numeric / anything_agg_2, 5:numeric)}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_agg_2': ANYTHING(agg_2), 'anything_s_key': ANYTHING(s_key)}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'agg_2': t0.agg_2, 's_key': t0.s_key}) - PROJECT(columns={'agg_2': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) - SCAN(table=main.SITES, columns={'s_key': s_key}) - SCAN(table=main.LINKS, columns={'l_source': l_source}) +ROOT(columns=[('key', s_key), ('page_rank', page_rank_0)], orderings=[(s_key):asc_first]) + PROJECT(columns={'page_rank_0': ROUND(1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 5:numeric), 's_key': s_key}) + SCAN(table=main.SITES, columns={'s_key': s_key}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index b3053e78b..5d72b1ec3 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,14 +1,14 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link, columns={'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link': dummy_link, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index 16646f254..e2deffe86 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,18 +1,18 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_20, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_18, columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) - PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t0.consider_link_19, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_19': t1.consider_link_19, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_18': dummy_link_18, 'page_rank_0_20': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_19 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_19': t0.consider_link_19, 'damp_modifier': t0.damp_modifier, 'dummy_link_18': t0.dummy_link_18, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_19': t1.consider_link_19, 'damp_modifier': t0.damp_modifier, 'dummy_link_18': t1.dummy_link_18, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index f29c64b32..3bb61fe4e 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,34 +1,34 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_590, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_588, columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) - PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t0.consider_link_589, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_589': t1.consider_link_589, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) - FILTER(condition=dummy_link_578, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t0.consider_link_579, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_579': t1.consider_link_579, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) - FILTER(condition=dummy_link_568, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t0.consider_link_569, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_569': t1.consider_link_569, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) - FILTER(condition=dummy_link_558, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t0.consider_link_559, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_559': t1.consider_link_559, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) - FILTER(condition=dummy_link_548, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t0.consider_link_549, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_549': t1.consider_link_549, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_588': dummy_link_588, 'page_rank_0_590': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_589 * page_rank_0_580 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_589': t0.consider_link_589, 'damp_modifier': t0.damp_modifier, 'dummy_link_588': t0.dummy_link_588, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_589': t1.consider_link_589, 'damp_modifier': t0.damp_modifier, 'dummy_link_588': t1.dummy_link_588, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_580': t0.page_rank_0_580}) + FILTER(condition=dummy_link_578, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_580': page_rank_0_580, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_578': dummy_link_578, 'n_out': n_out, 'page_rank_0_580': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_579 * page_rank_0_570 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_579': t0.consider_link_579, 'damp_modifier': t0.damp_modifier, 'dummy_link_578': t0.dummy_link_578, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_579': t1.consider_link_579, 'damp_modifier': t0.damp_modifier, 'dummy_link_578': t1.dummy_link_578, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_570': t0.page_rank_0_570}) + FILTER(condition=dummy_link_568, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_570': page_rank_0_570, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_568': dummy_link_568, 'n_out': n_out, 'page_rank_0_570': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_569 * page_rank_0_560 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_569': t0.consider_link_569, 'damp_modifier': t0.damp_modifier, 'dummy_link_568': t0.dummy_link_568, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_569': t1.consider_link_569, 'damp_modifier': t0.damp_modifier, 'dummy_link_568': t1.dummy_link_568, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_560': t0.page_rank_0_560}) + FILTER(condition=dummy_link_558, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_560': page_rank_0_560, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_558': dummy_link_558, 'n_out': n_out, 'page_rank_0_560': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_559 * page_rank_0_550 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_559': t0.consider_link_559, 'damp_modifier': t0.damp_modifier, 'dummy_link_558': t0.dummy_link_558, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_559': t1.consider_link_559, 'damp_modifier': t0.damp_modifier, 'dummy_link_558': t1.dummy_link_558, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_550': t0.page_rank_0_550}) + FILTER(condition=dummy_link_548, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_550': page_rank_0_550, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_548': dummy_link_548, 'n_out': n_out, 'page_rank_0_550': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_549 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_549': t0.consider_link_549, 'damp_modifier': t0.damp_modifier, 'dummy_link_548': t0.dummy_link_548, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_549': t1.consider_link_549, 'damp_modifier': t0.damp_modifier, 'dummy_link_548': t1.dummy_link_548, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index d78322a4d..0b43e4e96 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,22 +1,22 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_58, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_56, columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) - PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t0.consider_link_57, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_57': t1.consider_link_57, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) - FILTER(condition=dummy_link_46, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t0.consider_link_47, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_47': t1.consider_link_47, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_56': dummy_link_56, 'page_rank_0_58': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_57 * page_rank_0_48 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_57': t0.consider_link_57, 'damp_modifier': t0.damp_modifier, 'dummy_link_56': t0.dummy_link_56, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_57': t1.consider_link_57, 'damp_modifier': t0.damp_modifier, 'dummy_link_56': t1.dummy_link_56, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_48': t0.page_rank_0_48}) + FILTER(condition=dummy_link_46, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_48': page_rank_0_48, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_46': dummy_link_46, 'n_out': n_out, 'page_rank_0_48': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_47 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_47': t0.consider_link_47, 'damp_modifier': t0.damp_modifier, 'dummy_link_46': t0.dummy_link_46, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_47': t1.consider_link_47, 'damp_modifier': t0.damp_modifier, 'dummy_link_46': t1.dummy_link_46, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index 1c504e853..d8f77fd16 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,26 +1,26 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_134, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_132, columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) - PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t0.consider_link_133, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_133': t1.consider_link_133, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) - FILTER(condition=dummy_link_122, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t0.consider_link_123, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_123': t1.consider_link_123, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) - FILTER(condition=dummy_link_112, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t0.consider_link_113, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_113': t1.consider_link_113, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_132': dummy_link_132, 'page_rank_0_134': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_133 * page_rank_0_124 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_133': t0.consider_link_133, 'damp_modifier': t0.damp_modifier, 'dummy_link_132': t0.dummy_link_132, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_133': t1.consider_link_133, 'damp_modifier': t0.damp_modifier, 'dummy_link_132': t1.dummy_link_132, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_124': t0.page_rank_0_124}) + FILTER(condition=dummy_link_122, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_124': page_rank_0_124, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_122': dummy_link_122, 'n_out': n_out, 'page_rank_0_124': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_123 * page_rank_0_114 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_123': t0.consider_link_123, 'damp_modifier': t0.damp_modifier, 'dummy_link_122': t0.dummy_link_122, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_123': t1.consider_link_123, 'damp_modifier': t0.damp_modifier, 'dummy_link_122': t1.dummy_link_122, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_114': t0.page_rank_0_114}) + FILTER(condition=dummy_link_112, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_114': page_rank_0_114, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_112': dummy_link_112, 'n_out': n_out, 'page_rank_0_114': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_113 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_113': t0.consider_link_113, 'damp_modifier': t0.damp_modifier, 'dummy_link_112': t0.dummy_link_112, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_113': t1.consider_link_113, 'damp_modifier': t0.damp_modifier, 'dummy_link_112': t1.dummy_link_112, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index fb3b3c239..32c783fd9 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,30 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_286, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_284, columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) - PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t0.consider_link_285, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_285': t1.consider_link_285, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) - FILTER(condition=dummy_link_274, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t0.consider_link_275, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_275': t1.consider_link_275, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) - FILTER(condition=dummy_link_264, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t0.consider_link_265, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_265': t1.consider_link_265, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) - FILTER(condition=dummy_link_254, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t0.consider_link_255, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_255': t1.consider_link_255, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_284': dummy_link_284, 'page_rank_0_286': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_285 * page_rank_0_276 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_285': t0.consider_link_285, 'damp_modifier': t0.damp_modifier, 'dummy_link_284': t0.dummy_link_284, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_285': t1.consider_link_285, 'damp_modifier': t0.damp_modifier, 'dummy_link_284': t1.dummy_link_284, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_276': t0.page_rank_0_276}) + FILTER(condition=dummy_link_274, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_276': page_rank_0_276, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_274': dummy_link_274, 'n_out': n_out, 'page_rank_0_276': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_275 * page_rank_0_266 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_275': t0.consider_link_275, 'damp_modifier': t0.damp_modifier, 'dummy_link_274': t0.dummy_link_274, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_275': t1.consider_link_275, 'damp_modifier': t0.damp_modifier, 'dummy_link_274': t1.dummy_link_274, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_266': t0.page_rank_0_266}) + FILTER(condition=dummy_link_264, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_266': page_rank_0_266, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_264': dummy_link_264, 'n_out': n_out, 'page_rank_0_266': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_265 * page_rank_0_256 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_265': t0.consider_link_265, 'damp_modifier': t0.damp_modifier, 'dummy_link_264': t0.dummy_link_264, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_265': t1.consider_link_265, 'damp_modifier': t0.damp_modifier, 'dummy_link_264': t1.dummy_link_264, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_256': t0.page_rank_0_256}) + FILTER(condition=dummy_link_254, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_256': page_rank_0_256, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_254': dummy_link_254, 'n_out': n_out, 'page_rank_0_256': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_255 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_255': t0.consider_link_255, 'damp_modifier': t0.damp_modifier, 'dummy_link_254': t0.dummy_link_254, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_255': t1.consider_link_255, 'damp_modifier': t0.damp_modifier, 'dummy_link_254': t1.dummy_link_254, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 045977af7..e33c88596 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -1,42 +1,42 @@ ROOT(columns=[('key', s_key), ('page_rank', page_rank_1)], orderings=[(s_key):asc_first]) PROJECT(columns={'page_rank_1': ROUND(page_rank_0_2414, 5:numeric), 's_key': s_key}) FILTER(condition=dummy_link_2412, columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) - PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t0.consider_link_2413, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2413': t1.consider_link_2413, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) - FILTER(condition=dummy_link_2402, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t0.consider_link_2403, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2403': t1.consider_link_2403, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) - FILTER(condition=dummy_link_2392, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t0.consider_link_2393, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2393': t1.consider_link_2393, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) - FILTER(condition=dummy_link_2382, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t0.consider_link_2383, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2383': t1.consider_link_2383, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) - FILTER(condition=dummy_link_2372, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t0.consider_link_2373, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2373': t1.consider_link_2373, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) - FILTER(condition=dummy_link_2362, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t0.consider_link_2363, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2363': t1.consider_link_2363, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) - FILTER(condition=dummy_link_2352, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t0.consider_link_2353, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link_2353': t1.consider_link_2353, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) - FILTER(condition=dummy_link, columns={'anything_n': anything_n, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) - PROJECT(columns={'anything_n': anything_n, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': 0.15000000000000002:numeric / anything_n + 0.85:numeric * RELSUM(args=[consider_link * page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t0.consider_link, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 'page_rank': t0.page_rank, 's_key': t1.s_key}) - JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'consider_link': t1.consider_link, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank': t0.page_rank}) - PROJECT(columns={'anything_n': anything_n, 'anything_s_key': anything_s_key, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric), 'page_rank': 1.0:numeric / anything_n}) - AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) - PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 's_key': s_key}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 's_key': t0.s_key}) - PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) + PROJECT(columns={'dummy_link_2412': dummy_link_2412, 'page_rank_0_2414': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2413 * page_rank_0_2404 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2413': t0.consider_link_2413, 'damp_modifier': t0.damp_modifier, 'dummy_link_2412': t0.dummy_link_2412, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2413': t1.consider_link_2413, 'damp_modifier': t0.damp_modifier, 'dummy_link_2412': t1.dummy_link_2412, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2404': t0.page_rank_0_2404}) + FILTER(condition=dummy_link_2402, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2402': dummy_link_2402, 'n_out': n_out, 'page_rank_0_2404': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2403 * page_rank_0_2394 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2403': t0.consider_link_2403, 'damp_modifier': t0.damp_modifier, 'dummy_link_2402': t0.dummy_link_2402, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2403': t1.consider_link_2403, 'damp_modifier': t0.damp_modifier, 'dummy_link_2402': t1.dummy_link_2402, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2394': t0.page_rank_0_2394}) + FILTER(condition=dummy_link_2392, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2392': dummy_link_2392, 'n_out': n_out, 'page_rank_0_2394': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2393 * page_rank_0_2384 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2393': t0.consider_link_2393, 'damp_modifier': t0.damp_modifier, 'dummy_link_2392': t0.dummy_link_2392, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2393': t1.consider_link_2393, 'damp_modifier': t0.damp_modifier, 'dummy_link_2392': t1.dummy_link_2392, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2384': t0.page_rank_0_2384}) + FILTER(condition=dummy_link_2382, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2382': dummy_link_2382, 'n_out': n_out, 'page_rank_0_2384': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2383 * page_rank_0_2374 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2383': t0.consider_link_2383, 'damp_modifier': t0.damp_modifier, 'dummy_link_2382': t0.dummy_link_2382, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2383': t1.consider_link_2383, 'damp_modifier': t0.damp_modifier, 'dummy_link_2382': t1.dummy_link_2382, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2374': t0.page_rank_0_2374}) + FILTER(condition=dummy_link_2372, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2372': dummy_link_2372, 'n_out': n_out, 'page_rank_0_2374': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2373 * page_rank_0_2364 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2373': t0.consider_link_2373, 'damp_modifier': t0.damp_modifier, 'dummy_link_2372': t0.dummy_link_2372, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2373': t1.consider_link_2373, 'damp_modifier': t0.damp_modifier, 'dummy_link_2372': t1.dummy_link_2372, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2364': t0.page_rank_0_2364}) + FILTER(condition=dummy_link_2362, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2362': dummy_link_2362, 'n_out': n_out, 'page_rank_0_2364': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2363 * page_rank_0_2354 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2363': t0.consider_link_2363, 'damp_modifier': t0.damp_modifier, 'dummy_link_2362': t0.dummy_link_2362, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2363': t1.consider_link_2363, 'damp_modifier': t0.damp_modifier, 'dummy_link_2362': t1.dummy_link_2362, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0_2354': t0.page_rank_0_2354}) + FILTER(condition=dummy_link_2352, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link_2352': dummy_link_2352, 'n_out': n_out, 'page_rank_0_2354': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link_2353 * page_rank_0 / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2353': t0.consider_link_2353, 'damp_modifier': t0.damp_modifier, 'dummy_link_2352': t0.dummy_link_2352, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'consider_link_2353': t1.consider_link_2353, 'damp_modifier': t0.damp_modifier, 'dummy_link_2352': t1.dummy_link_2352, 'l_target': t1.l_target, 'n_out': t0.n_out, 'page_rank_0': t0.page_rank_0}) + FILTER(condition=dummy_link, columns={'damp_modifier': damp_modifier, 'n_out': n_out, 'page_rank_0': page_rank_0, 's_key': s_key}) + PROJECT(columns={'damp_modifier': damp_modifier, 'dummy_link': dummy_link, 'n_out': n_out, 'page_rank_0': damp_modifier + 0.85:numeric * RELSUM(args=[consider_link * anything_page_rank / n_out], partition=[s_key], order=[]), 's_key': s_key}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t0.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t0.dummy_link, 'n_out': t0.n_out, 's_key': t1.s_key}) + JOIN(condition=t0.anything_s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_page_rank': t0.anything_page_rank, 'consider_link': t1.consider_link, 'damp_modifier': t0.damp_modifier, 'dummy_link': t1.dummy_link, 'l_target': t1.l_target, 'n_out': t0.n_out}) + PROJECT(columns={'anything_page_rank': anything_page_rank, 'anything_s_key': anything_s_key, 'damp_modifier': 0.15:numeric / anything_n, 'n_out': DEFAULT_TO(sum_n_target, 0:numeric)}) + AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'anything_s_key': ANYTHING(s_key), 'sum_n_target': SUM(n_target)}) + PROJECT(columns={'n': n, 'n_target': IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)), 'page_rank': page_rank, 's_key': s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) PROJECT(columns={'consider_link': INTEGER(ABSENT(l_target) | l_source != l_target), 'dummy_link': PRESENT(l_target) & l_source == l_target, 'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_pydough_functions/simple_pydough_functions.py b/tests/test_pydough_functions/simple_pydough_functions.py index 5fd4aad86..6ed2db9b9 100644 --- a/tests/test_pydough_functions/simple_pydough_functions.py +++ b/tests/test_pydough_functions/simple_pydough_functions.py @@ -3064,7 +3064,10 @@ def pagerank(n_iters): # The seed value for the PageRank computation, which is evenly distributed. # Also computes the number of sites in the graph & the number of sites each # site links to, which are both used downstream. - source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n, n_out=n_out_expr) + source = sites.CALCULATE(n=RELSIZE()).CALCULATE(page_rank=1.0 / n) + + if n_iters > 0: + source = source.CALCULATE(n_out=n_out_expr, damp_modifier=0.15 / n) # Repeats the following procedure for n_iters iterations to build the next # generation of PageRank values from the current generation. @@ -3084,9 +3087,9 @@ def pagerank(n_iters): ) .target_site.PARTITION(name=f"s{i}", by=key) .target_site.CALCULATE( - n, + damp_modifier, n_out, - page_rank=(1.0 - d) / n + page_rank=damp_modifier + d * RELSUM(consider_link * page_rank / n_out, per=f"s{i}"), ) .WHERE(dummy_link) diff --git a/tests/test_sql_refsols/pagerank_a0_sqlite.sql b/tests/test_sql_refsols/pagerank_a0_sqlite.sql index 4fe8f22be..866d2c84f 100644 --- a/tests/test_sql_refsols/pagerank_a0_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a0_sqlite.sql @@ -1,16 +1,12 @@ -WITH _s0 AS ( +WITH _t0 AS ( SELECT - COUNT(*) OVER () AS agg_2, + ROUND(CAST(1.0 AS REAL) / COUNT(*) OVER (), 5) AS page_rank_0, s_key FROM main.sites ) SELECT - MAX(_s0.s_key) AS key, - ROUND(CAST(1.0 AS REAL) / MAX(_s0.agg_2), 5) AS page_rank -FROM _s0 AS _s0 -JOIN main.links AS links - ON _s0.s_key = links.l_source -GROUP BY - _s0.s_key + s_key AS key, + page_rank_0 AS page_rank +FROM _t0 ORDER BY - MAX(_s0.s_key) + s_key diff --git a/tests/test_sql_refsols/pagerank_a1_sqlite.sql b/tests/test_sql_refsols/pagerank_a1_sqlite.sql index d7965056f..c590ae2af 100644 --- a/tests/test_sql_refsols/pagerank_a1_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a1_sqlite.sql @@ -5,6 +5,7 @@ WITH _t7 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t7 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t7 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,11 +29,9 @@ WITH _t7 AS ( _s0.s_key ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t8.l_source <> _t8.l_target OR _t8.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, NOT _t8.l_target IS NULL AND _t8.l_source = _t8.l_target AS dummy_link, diff --git a/tests/test_sql_refsols/pagerank_a2_sqlite.sql b/tests/test_sql_refsols/pagerank_a2_sqlite.sql index 0b90f0fd8..991a214e5 100644 --- a/tests/test_sql_refsols/pagerank_a2_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a2_sqlite.sql @@ -5,6 +5,7 @@ WITH _t9 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t9 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t9 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t9 AS ( _s0.s_key ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t10.l_source <> _t10.l_target OR _t10.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t10.l_target IS NULL AND _t10.l_source = _t10.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,9 +45,7 @@ WITH _t9 AS ( ON _s5.s_key = _t10.l_target OR _t10.l_target IS NULL ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t11.l_source <> _t11.l_target OR _t11.l_target IS NULL AS INTEGER) * _t4.page_rank_0 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_a6_sqlite.sql b/tests/test_sql_refsols/pagerank_a6_sqlite.sql index 24a887003..1eb55bf92 100644 --- a/tests/test_sql_refsols/pagerank_a6_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_a6_sqlite.sql @@ -5,6 +5,7 @@ WITH _t17 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t17 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t17 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t17 AS ( _s0.s_key ), _t12 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t17 AS ( ON _s5.s_key = _t18.l_target OR _t18.l_target IS NULL ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t12.anything_n - ) + 0.85 * SUM( + _t12.damp_modifier + 0.85 * SUM( CAST(( CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t12.page_rank_0 ) AS REAL) / _t12.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_550, - _t12.anything_n, + _t12.damp_modifier, NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_548, _t12.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t17 AS ( _t12.dummy_link ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t10.page_rank_0_550 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_560, - _t10.anything_n, + _t10.damp_modifier, NOT _t20.l_target IS NULL AND _t20.l_source = _t20.l_target AS dummy_link_558, _t10.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t17 AS ( _t10.dummy_link_548 ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t21.l_source <> _t21.l_target OR _t21.l_target IS NULL AS INTEGER) * _t8.page_rank_0_560 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_570, - _t8.anything_n, + _t8.damp_modifier, NOT _t21.l_target IS NULL AND _t21.l_source = _t21.l_target AS dummy_link_568, _t8.n_out, _s17.s_key @@ -106,14 +99,12 @@ WITH _t17 AS ( _t8.dummy_link_558 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _t6.page_rank_0_570 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_580, - _t6.anything_n, + _t6.damp_modifier, NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link_578, _t6.n_out, _s21.s_key @@ -126,9 +117,7 @@ WITH _t17 AS ( _t6.dummy_link_568 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t4.page_rank_0_580 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_b3_sqlite.sql b/tests/test_sql_refsols/pagerank_b3_sqlite.sql index 40404439e..324265a0b 100644 --- a/tests/test_sql_refsols/pagerank_b3_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_b3_sqlite.sql @@ -5,6 +5,7 @@ WITH _t11 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t11 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t11 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t11 AS ( _s0.s_key ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t12.l_source <> _t12.l_target OR _t12.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t12.l_target IS NULL AND _t12.l_source = _t12.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t11 AS ( ON _s5.s_key = _t12.l_target OR _t12.l_target IS NULL ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t13.l_source <> _t13.l_target OR _t13.l_target IS NULL AS INTEGER) * _t6.page_rank_0 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_48, - _t6.anything_n, + _t6.damp_modifier, NOT _t13.l_target IS NULL AND _t13.l_source = _t13.l_target AS dummy_link_46, _t6.n_out, _s9.s_key @@ -66,9 +63,7 @@ WITH _t11 AS ( _t6.dummy_link ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _t4.page_rank_0_48 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_c4_sqlite.sql b/tests/test_sql_refsols/pagerank_c4_sqlite.sql index e0c215a16..8dc1fb5dd 100644 --- a/tests/test_sql_refsols/pagerank_c4_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_c4_sqlite.sql @@ -5,6 +5,7 @@ WITH _t13 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t13 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t13 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t13 AS ( _s0.s_key ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t14.l_source <> _t14.l_target OR _t14.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t14.l_target IS NULL AND _t14.l_source = _t14.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t13 AS ( ON _s5.s_key = _t14.l_target OR _t14.l_target IS NULL ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t15.l_source <> _t15.l_target OR _t15.l_target IS NULL AS INTEGER) * _t8.page_rank_0 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_114, - _t8.anything_n, + _t8.damp_modifier, NOT _t15.l_target IS NULL AND _t15.l_source = _t15.l_target AS dummy_link_112, _t8.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t13 AS ( _t8.dummy_link ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _t6.page_rank_0_114 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_124, - _t6.anything_n, + _t6.damp_modifier, NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link_122, _t6.n_out, _s13.s_key @@ -86,9 +81,7 @@ WITH _t13 AS ( _t6.dummy_link_112 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t4.page_rank_0_124 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_d5_sqlite.sql b/tests/test_sql_refsols/pagerank_d5_sqlite.sql index f6f5cb16e..df1187f10 100644 --- a/tests/test_sql_refsols/pagerank_d5_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_d5_sqlite.sql @@ -5,6 +5,7 @@ WITH _t15 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t15 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t15 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t15 AS ( _s0.s_key ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t16.l_source <> _t16.l_target OR _t16.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t16.l_target IS NULL AND _t16.l_source = _t16.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t15 AS ( ON _s5.s_key = _t16.l_target OR _t16.l_target IS NULL ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t17.l_source <> _t17.l_target OR _t17.l_target IS NULL AS INTEGER) * _t10.page_rank_0 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_256, - _t10.anything_n, + _t10.damp_modifier, NOT _t17.l_target IS NULL AND _t17.l_source = _t17.l_target AS dummy_link_254, _t10.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t15 AS ( _t10.dummy_link ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t18.l_source <> _t18.l_target OR _t18.l_target IS NULL AS INTEGER) * _t8.page_rank_0_256 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_266, - _t8.anything_n, + _t8.damp_modifier, NOT _t18.l_target IS NULL AND _t18.l_source = _t18.l_target AS dummy_link_264, _t8.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t15 AS ( _t8.dummy_link_254 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t19.l_source <> _t19.l_target OR _t19.l_target IS NULL AS INTEGER) * _t6.page_rank_0_266 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_276, - _t6.anything_n, + _t6.damp_modifier, NOT _t19.l_target IS NULL AND _t19.l_source = _t19.l_target AS dummy_link_274, _t6.n_out, _s17.s_key @@ -106,9 +99,7 @@ WITH _t15 AS ( _t6.dummy_link_264 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t20.l_source <> _t20.l_target OR _t20.l_target IS NULL AS INTEGER) * _t4.page_rank_0_276 ) AS REAL) / _t4.n_out diff --git a/tests/test_sql_refsols/pagerank_h8_sqlite.sql b/tests/test_sql_refsols/pagerank_h8_sqlite.sql index 017bc5921..f603efd8d 100644 --- a/tests/test_sql_refsols/pagerank_h8_sqlite.sql +++ b/tests/test_sql_refsols/pagerank_h8_sqlite.sql @@ -5,6 +5,7 @@ WITH _t21 AS ( ), _s0 AS ( SELECT COUNT(*) OVER () AS n, + CAST(1.0 AS REAL) / COUNT(*) OVER () AS page_rank, s_key FROM _t21 ), _s1 AS ( @@ -14,12 +15,12 @@ WITH _t21 AS ( FROM main.links ), _s2 AS ( SELECT + CAST(0.15 AS REAL) / MAX(_s0.n) AS damp_modifier, COALESCE( SUM(IIF(_s1.l_target IS NULL, _s0.n, CAST(_s1.l_source <> _s1.l_target AS INTEGER))), 0 ) AS n_out, - CAST(1.0 AS REAL) / MAX(_s0.n) AS page_rank, - MAX(_s0.n) AS anything_n, + MAX(_s0.page_rank) AS anything_page_rank, MAX(_s0.s_key) AS anything_s_key FROM _s0 AS _s0 JOIN _s1 AS _s1 @@ -28,14 +29,12 @@ WITH _t21 AS ( _s0.s_key ), _t16 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _s2.anything_n - ) + 0.85 * SUM( + _s2.damp_modifier + 0.85 * SUM( CAST(( - CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.page_rank + CAST(_t22.l_source <> _t22.l_target OR _t22.l_target IS NULL AS INTEGER) * _s2.anything_page_rank ) AS REAL) / _s2.n_out ) OVER (PARTITION BY _s5.s_key) AS page_rank_0, - _s2.anything_n, + _s2.damp_modifier, NOT _t22.l_target IS NULL AND _t22.l_source = _t22.l_target AS dummy_link, _s2.n_out, _s5.s_key @@ -46,14 +45,12 @@ WITH _t21 AS ( ON _s5.s_key = _t22.l_target OR _t22.l_target IS NULL ), _t14 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t16.anything_n - ) + 0.85 * SUM( + _t16.damp_modifier + 0.85 * SUM( CAST(( CAST(_t23.l_source <> _t23.l_target OR _t23.l_target IS NULL AS INTEGER) * _t16.page_rank_0 ) AS REAL) / _t16.n_out ) OVER (PARTITION BY _s9.s_key) AS page_rank_0_2354, - _t16.anything_n, + _t16.damp_modifier, NOT _t23.l_target IS NULL AND _t23.l_source = _t23.l_target AS dummy_link_2352, _t16.n_out, _s9.s_key @@ -66,14 +63,12 @@ WITH _t21 AS ( _t16.dummy_link ), _t12 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t14.anything_n - ) + 0.85 * SUM( + _t14.damp_modifier + 0.85 * SUM( CAST(( CAST(_t24.l_source <> _t24.l_target OR _t24.l_target IS NULL AS INTEGER) * _t14.page_rank_0_2354 ) AS REAL) / _t14.n_out ) OVER (PARTITION BY _s13.s_key) AS page_rank_0_2364, - _t14.anything_n, + _t14.damp_modifier, NOT _t24.l_target IS NULL AND _t24.l_source = _t24.l_target AS dummy_link_2362, _t14.n_out, _s13.s_key @@ -86,14 +81,12 @@ WITH _t21 AS ( _t14.dummy_link_2352 ), _t10 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t12.anything_n - ) + 0.85 * SUM( + _t12.damp_modifier + 0.85 * SUM( CAST(( CAST(_t25.l_source <> _t25.l_target OR _t25.l_target IS NULL AS INTEGER) * _t12.page_rank_0_2364 ) AS REAL) / _t12.n_out ) OVER (PARTITION BY _s17.s_key) AS page_rank_0_2374, - _t12.anything_n, + _t12.damp_modifier, NOT _t25.l_target IS NULL AND _t25.l_source = _t25.l_target AS dummy_link_2372, _t12.n_out, _s17.s_key @@ -106,14 +99,12 @@ WITH _t21 AS ( _t12.dummy_link_2362 ), _t8 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t10.anything_n - ) + 0.85 * SUM( + _t10.damp_modifier + 0.85 * SUM( CAST(( CAST(_t26.l_source <> _t26.l_target OR _t26.l_target IS NULL AS INTEGER) * _t10.page_rank_0_2374 ) AS REAL) / _t10.n_out ) OVER (PARTITION BY _s21.s_key) AS page_rank_0_2384, - _t10.anything_n, + _t10.damp_modifier, NOT _t26.l_target IS NULL AND _t26.l_source = _t26.l_target AS dummy_link_2382, _t10.n_out, _s21.s_key @@ -126,14 +117,12 @@ WITH _t21 AS ( _t10.dummy_link_2372 ), _t6 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t8.anything_n - ) + 0.85 * SUM( + _t8.damp_modifier + 0.85 * SUM( CAST(( CAST(_t27.l_source <> _t27.l_target OR _t27.l_target IS NULL AS INTEGER) * _t8.page_rank_0_2384 ) AS REAL) / _t8.n_out ) OVER (PARTITION BY _s25.s_key) AS page_rank_0_2394, - _t8.anything_n, + _t8.damp_modifier, NOT _t27.l_target IS NULL AND _t27.l_source = _t27.l_target AS dummy_link_2392, _t8.n_out, _s25.s_key @@ -146,14 +135,12 @@ WITH _t21 AS ( _t8.dummy_link_2382 ), _t4 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t6.anything_n - ) + 0.85 * SUM( + _t6.damp_modifier + 0.85 * SUM( CAST(( CAST(_t28.l_source <> _t28.l_target OR _t28.l_target IS NULL AS INTEGER) * _t6.page_rank_0_2394 ) AS REAL) / _t6.n_out ) OVER (PARTITION BY _s29.s_key) AS page_rank_0_2404, - _t6.anything_n, + _t6.damp_modifier, NOT _t28.l_target IS NULL AND _t28.l_source = _t28.l_target AS dummy_link_2402, _t6.n_out, _s29.s_key @@ -166,9 +153,7 @@ WITH _t21 AS ( _t6.dummy_link_2392 ), _t2 AS ( SELECT - ( - CAST(0.15000000000000002 AS REAL) / _t4.anything_n - ) + 0.85 * SUM( + _t4.damp_modifier + 0.85 * SUM( CAST(( CAST(_t29.l_source <> _t29.l_target OR _t29.l_target IS NULL AS INTEGER) * _t4.page_rank_0_2404 ) AS REAL) / _t4.n_out From 0508822866ff0b6c7495af1b5dd7de9297f8ce23 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 19:01:38 -0400 Subject: [PATCH 42/97] Initial revisions --- tests/conftest.py | 126 +------------------ tests/gen_data/gen_pagerank.py | 153 +++++++++++++++++++++++ tests/test_metadata/pagerank_graphs.json | 2 +- tests/testing_utilities.py | 4 +- 4 files changed, 162 insertions(+), 123 deletions(-) create mode 100644 tests/gen_data/gen_pagerank.py diff --git a/tests/conftest.py b/tests/conftest.py index 21b362b2d..99a34ae75 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from pydough.qdag import AstNodeBuilder from tests.testing_utilities import graph_fetcher +from .gen_data.gen_pagerank import gen_pagerank_records, pagerank_configs from .gen_data.gen_technograph import gen_technograph_records @@ -456,90 +457,9 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: # Setup the directory to be the main PyDough directory. base_dir: str = os.path.dirname(os.path.dirname(__file__)) - # The configurations for the pagerank databases. Each tuple contains: - # - The name of the database. - # - The number of nodes n in the graph. - # - The edges in the graph as a list of tuples (src, dst), assuming the - # nodes are numbered from 1 to n. - pagerank_configs = [ - ("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)]), - ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]), - ( - "PAGERANK_C", - 8, - [ - (1, 2), - (1, 6), - (2, 1), - (2, 5), - (2, 6), - (3, 2), - (4, 2), - (4, 5), - (5, 3), - (7, 8), - (8, 7), - ], - ), - ( - "PAGERANK_D", - 16, - [ - (1, 2), - (1, 3), - (1, 4), - (1, 5), - (2, 1), - (2, 5), - (3, 2), - (4, 2), - (4, 5), - (4, 11), - (5, 3), - (5, 11), - (5, 14), - (5, 16), - (6, 7), - (7, 8), - (8, 6), - (8, 7), - (9, 2), - (9, 10), - (11, 12), - (12, 13), - (12, 14), - (13, 4), - (13, 5), - (15, 2), - ], - ), - ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]), - ("PAGERANK_F", 100, []), - ( - "PAGERANK_G", - 1000, - [ - (j + 1, i + 1) - for i in range(1000) - for j in range(i + 1, 1000) - if str(i) in str(j) - ], - ), - ( - "PAGERANK_H", - 50, - [ - (i, j) - for i in range(1, 51) - for j in range(1, 51) - if i != j and (i < j or i % j == 0) - ], - ), - ] - # Setup each of the the pagerank databases using the configurations. result: dict[str, DatabaseContext] = {} - for name, nodes, vertices in pagerank_configs: + for name, nodes, edges in pagerank_configs(): # Create the database and ensure it is empty. subprocess.run( f"cd tests; rm -fv gen_data/{name.lower()}.db; sqlite3 gen_data/{name.lower()}.db < gen_data/init_pagerank.sql", @@ -547,44 +467,10 @@ def sqlite_pagerank_db_contexts() -> dict[str, DatabaseContext]: ) path: str = os.path.join(base_dir, f"tests/gen_data/{name.lower()}.db") connection: sqlite3.Connection = sqlite3.connect(path) - cursor: sqlite3.Cursor = connection.cursor() - - # For every node, insert an entry into the SITES table. - for site in range(nodes): - cursor.execute( - "INSERT INTO SITES VALUES (?, ?)", - (site + 1, f"SITE {chr(ord('A') + site)}"), - ) - - # For every edge, insert an entry into the LINKS table. Keep track of - # the nodes that have no outgoing links. - no_outgoing: set[int] = set(range(1, nodes + 1)) - for src, dst in vertices: - no_outgoing.discard(src) - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (src, dst), - ) - - # If there are no outgoing links for a site, insert a NULL link for it, - # indicating that the site links to ALL sites. - for site in no_outgoing: - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, None), - ) - - # Insert a dummy self-link for every site. - for site in range(1, nodes + 1): - cursor.execute( - "INSERT INTO LINKS VALUES (?, ?)", - (site, site), - ) - - # Commit the changes, close the cursor, and store the context in the - # result dictionary. - cursor.connection.commit() - cursor.close() + + # Fill the tables of the database using the nodes/edges, then store the + # database context in the result. + gen_pagerank_records(connection, nodes, edges) result[name] = DatabaseContext( DatabaseConnection(connection), DatabaseDialect.SQLITE ) diff --git a/tests/gen_data/gen_pagerank.py b/tests/gen_data/gen_pagerank.py new file mode 100644 index 000000000..703ec5c32 --- /dev/null +++ b/tests/gen_data/gen_pagerank.py @@ -0,0 +1,153 @@ +""" +Logic used to generate the various sqlite databases used for PageRank tests. +""" + +import sqlite3 + + +def pagerank_configs() -> list[tuple[str, int, list[tuple[int, int]]]]: + """ + Returns a list of configurations for generating PageRank test data. + Each tuple contains: + - The name of the configuration (should be in the form "PAGERANK_X"). + - The number of vertices in the graph (numbered 1 to n) + - The list of tuples indicating edges in the graph in the form (src, dest). + """ + configs: list[tuple[str, int, list[tuple[int, int]]]] = [] + configs.append(("PAGERANK_A", 4, [(1, 2), (2, 1), (2, 3), (3, 4), (4, 1), (4, 2)])) + configs.append( + ("PAGERANK_B", 5, [(1, 2), (2, 1), (2, 5), (3, 2), (4, 2), (4, 5), (5, 3)]) + ) + configs.append( + ( + "PAGERANK_C", + 8, + [ + (1, 2), + (1, 6), + (2, 1), + (2, 5), + (2, 6), + (3, 2), + (4, 2), + (4, 5), + (5, 3), + (7, 8), + (8, 7), + ], + ) + ) + configs.append( + ( + "PAGERANK_D", + 16, + [ + (1, 2), + (1, 3), + (1, 4), + (1, 5), + (2, 1), + (2, 5), + (3, 2), + (4, 2), + (4, 5), + (4, 11), + (5, 3), + (5, 11), + (5, 14), + (5, 16), + (6, 7), + (7, 8), + (8, 6), + (8, 7), + (9, 2), + (9, 10), + (11, 12), + (12, 13), + (12, 14), + (13, 4), + (13, 5), + (15, 2), + ], + ) + ) + configs.append( + ("PAGERANK_E", 5, [(i, j) for i in range(1, 6) for j in range(1, 6) if i != j]) + ) + configs.append(("PAGERANK_F", 100, [])) + configs.append( + ( + "PAGERANK_G", + 1000, + [ + (j + 1, i + 1) + for i in range(1000) + for j in range(i + 1, 1000) + if str(i) in str(j) + ], + ) + ) + configs.append( + ( + "PAGERANK_H", + 50, + [ + (i, j) + for i in range(1, 51) + for j in range(1, 51) + if i != j and (i < j or i % j == 0) + ], + ) + ) + return configs + + +def gen_pagerank_records( + connection: sqlite3.Connection, nodes: int, edges: list[tuple[int, int]] +) -> None: + """ + Fills a sqlite database with PageRank test data based on the provided + configuration. + + Args: + `connection`: The sqlite3 connection to the database. + `nodes`: The number of nodes in the graph. + `edges`: A list of tuples representing the edges in the graph. + """ + cursor: sqlite3.Cursor = connection.cursor() + + # For every node, insert an entry into the SITES table. + for site in range(nodes): + cursor.execute( + "INSERT INTO SITES VALUES (?, ?)", + (site + 1, f"SITE {hex(site)[2:]:0>4}"), + ) + + # For every edge, insert an entry into the LINKS table. Keep track of + # the nodes that have no outgoing links. + no_outgoing: set[int] = set(range(1, nodes + 1)) + for src, dst in edges: + no_outgoing.discard(src) + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (src, dst), + ) + + # If there are no outgoing links for a site, insert a NULL link for it, + # indicating that the site links to ALL sites. + for site in no_outgoing: + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, None), + ) + + # Insert a dummy self-link for every site. + for site in range(1, nodes + 1): + cursor.execute( + "INSERT INTO LINKS VALUES (?, ?)", + (site, site), + ) + + # Commit the changes & close the cursor + cursor.connection.commit() + cursor.close() diff --git a/tests/test_metadata/pagerank_graphs.json b/tests/test_metadata/pagerank_graphs.json index bd4150ceb..bbbe11275 100644 --- a/tests/test_metadata/pagerank_graphs.json +++ b/tests/test_metadata/pagerank_graphs.json @@ -61,4 +61,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d2c01690a..2b5750cb1 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1127,7 +1127,7 @@ def run_relational_test( """ # Skip if indicated. if self.skip_relational: - pytest.skip(f"Skipping relational plan test for {self.test_name!r}") + pytest.skip(f"Skipping relational plan test for {self.test_name}") # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) @@ -1185,7 +1185,7 @@ def run_sql_test( """ # Skip if indicated. if self.skip_sql: - pytest.skip(f"Skipping SQL text test for {self.test_name!r}") + pytest.skip(f"Skipping SQL text test for {self.test_name}") # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) From f59f45739d95f3f1054546ef4b61a9f849a7cb23 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 19:52:38 -0400 Subject: [PATCH 43/97] Added set up for simplification --- pydough/conversion/relational_converter.py | 2 + .../conversion/relational_simplification.py | 192 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 pydough/conversion/relational_simplification.py diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index f66debc38..8ae333b21 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -84,6 +84,7 @@ from .hybrid_tree import HybridTree from .merge_projects import merge_projects from .projection_pullup import pullup_projections +from .relational_simplification import simplify_expressions @dataclass @@ -1446,6 +1447,7 @@ def optimize_relational_tree( # Step 8: run projection pullup followed by column pruning 2x. for _ in range(2): root = confirm_root(pullup_projections(root)) + simplify_expressions(root) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run filter pushdown diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py new file mode 100644 index 000000000..aa88d0089 --- /dev/null +++ b/pydough/conversion/relational_simplification.py @@ -0,0 +1,192 @@ +""" +Logic used to simplify relational expressions in a relational node. +""" + +__all__ = ["simplify_expressions"] + + +from enum import Enum + +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + EmptySingleton, + ExpressionSortInfo, + Filter, + Join, + Limit, + LiteralExpression, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, + Scan, + WindowCallExpression, +) +from pydough.relational.rel_util import ( + add_input_name, +) + + +class LogicalPredicate(Enum): + """ + Enum representing logical predicates that can be inferred about relational + expressions. + """ + + NOT_NULL = "NOT_NULL" + NOT_NEGATIVE = "NOT_NEGATIVE" + POSITIVE = "POSITIVE" + + +def run_simplification( + expr: RelationalExpression, + input_predicates: dict[RelationalExpression, set[LogicalPredicate]], +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + Runs the simplification on a single expression, applying any predicates + inferred from the input nodes to aid the process and inferring any new + predicates that apply to the resulting expression. + + Args: + `expr`: The expression to simplify. + `input_predicates`: A dictionary mapping input columns to the set of + predicates that are true for the column. + + Returns: + The simplified expression and a set of predicates that apply to the + resulting expression. + """ + new_args: list[RelationalExpression] + new_partitions: list[RelationalExpression] + new_orders: list[ExpressionSortInfo] + arg_predicates: list[set[LogicalPredicate]] + output_predicates: set[LogicalPredicate] = set() + + if isinstance(expr, LiteralExpression): + if expr.value is not None: + output_predicates.add(LogicalPredicate.NOT_NULL) + if isinstance(expr.value, (int, float)): + if expr.value >= 0: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if expr.value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + + if isinstance(expr, ColumnReference): + output_predicates.update(input_predicates.get(expr, set())) + + if isinstance(expr, CallExpression): + new_args = [] + arg_predicates = [] + for arg in expr.inputs: + new_arg, new_preds = run_simplification(arg, input_predicates) + new_args.append(new_arg) + arg_predicates.append(new_preds) + expr = CallExpression(expr.op, expr.data_type, new_args) + + if isinstance(expr, WindowCallExpression): + new_args = [] + new_partitions = [] + new_orders = [] + arg_predicates = [] + for arg in expr.inputs: + new_arg, new_preds = run_simplification(arg, input_predicates) + new_args.append(new_arg) + arg_predicates.append(new_preds) + for partition in expr.partition_inputs: + new_partition, _ = run_simplification(partition, input_predicates) + new_partitions.append(new_partition) + for order in expr.order_inputs: + new_order, _ = run_simplification(order.expr, input_predicates) + new_orders.append( + ExpressionSortInfo(new_order, order.ascending, order.nulls_first) + ) + expr = WindowCallExpression( + expr.op, + expr.data_type, + new_args, + new_partitions, + new_orders, + expr.kwargs, + ) + return expr, output_predicates + + +def simplify_expressions( + node: RelationalNode, +) -> dict[RelationalExpression, set[LogicalPredicate]]: + """ + The main recursive procedure done to perform expression simplification on + a relational node and its descendants. The transformation is done in-place + + Args: + `node`: The relational node to perform simplification on. + + Returns: + The predicates inferred from the output columns of the node. + """ + # Recursively invoke the procedure on all inputs to the node. + input_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + for idx, input_node in enumerate(node.inputs): + input_alias: str | None = node.default_input_aliases[idx] + predicates = simplify_expressions(input_node) + for expr, preds in predicates.items(): + input_predicates[add_input_name(expr, input_alias)] = preds + + # Transform the expressions of the current node in-place. + ref_expr: RelationalExpression + output_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + match node: + case ( + Project() + | Filter() + | Join() + | Limit() + | RelationalRoot() + | Scan() + | EmptySingleton() + ): + for name, expr in node.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.columns[name], output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + if isinstance(node, (Filter, Join)): + node._condition = run_simplification(node.condition, input_predicates)[ + 0 + ] + if isinstance(node, (RelationalRoot, Limit)): + node._orderings = [ + ExpressionSortInfo( + run_simplification(order_expr.expr, input_predicates)[0], + order_expr.ascending, + order_expr.nulls_first, + ) + for order_expr in node.orderings + ] + if isinstance(node, RelationalRoot): + node._ordered_columns = [ + (name, node.columns[name]) for name, _ in node.ordered_columns + ] + case Aggregate(): + for name, expr in node.keys.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.keys[name], output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + node.columns[name] = node.keys[name] + for name, expr in node.aggregations.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_agg, output_predicates[ref_expr] = run_simplification( + expr, input_predicates + ) + assert isinstance(new_agg, CallExpression) + node.aggregations[name] = new_agg + node.columns[name] = node.aggregations[name] + + # For all other nodes, do not perform any simplification. + case _: + pass + + return output_predicates From b34f6ca22729fa06d18cc5961cb8da2a86a08344 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:17:25 -0400 Subject: [PATCH 44/97] Added first simplification rules --- .../conversion/relational_simplification.py | 164 +++++++++++++++--- .../agg_simplification_1.txt | 2 +- .../agg_simplification_2.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- tests/test_plan_refsols/common_prefix_ag.txt | 2 +- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 2 +- tests/test_plan_refsols/common_prefix_aj.txt | 2 +- tests/test_plan_refsols/common_prefix_ak.txt | 2 +- tests/test_plan_refsols/common_prefix_al.txt | 2 +- tests/test_plan_refsols/common_prefix_p.txt | 2 +- tests/test_plan_refsols/common_prefix_x.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_29.txt | 2 +- .../month_year_sliding_windows.txt | 4 +- .../multi_partition_access_5.txt | 15 +- .../multi_partition_access_6.txt | 4 +- tests/test_plan_refsols/simple_cross_11.txt | 2 +- .../sqlite_udf_count_epsilon.txt | 2 +- ...ograph_battery_failure_rates_anomalies.txt | 2 +- .../technograph_incident_rate_per_brand.txt | 2 +- .../technograph_most_unreliable_products.txt | 2 +- tests/test_plan_refsols/tpch_q20.txt | 2 +- .../agg_simplification_1_ansi.sql | 2 +- .../agg_simplification_1_sqlite.sql | 2 +- .../agg_simplification_2_ansi.sql | 6 +- .../agg_simplification_2_sqlite.sql | 6 +- .../sqlite_udf_count_epsilon_sqlite.sql | 2 +- ...h_battery_failure_rates_anomalies_ansi.sql | 4 +- ...battery_failure_rates_anomalies_sqlite.sql | 4 +- ...chnograph_incident_rate_per_brand_ansi.sql | 2 +- ...nograph_incident_rate_per_brand_sqlite.sql | 2 +- ...hnograph_most_unreliable_products_ansi.sql | 4 +- ...ograph_most_unreliable_products_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q20_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q20_sqlite.sql | 2 +- 36 files changed, 187 insertions(+), 80 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index aa88d0089..58414afcb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -7,6 +7,7 @@ from enum import Enum +import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, CallExpression, @@ -15,6 +16,7 @@ ExpressionSortInfo, Filter, Join, + JoinType, Limit, LiteralExpression, Project, @@ -40,9 +42,91 @@ class LogicalPredicate(Enum): POSITIVE = "POSITIVE" +def simplify_function_call( + expr: CallExpression, + arg_predicates: list[set[LogicalPredicate]], + no_group_aggregate: bool, +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + TODO + """ + output_expr: RelationalExpression = expr + output_predicates: set[LogicalPredicate] = set() + match expr.op: + case pydop.COUNT | pydop.NDISTINCT: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if ( + len(expr.inputs) == 1 + and LogicalPredicate.NOT_NULL in arg_predicates[0] + and no_group_aggregate + ): + output_predicates.add(LogicalPredicate.POSITIVE) + case ( + pydop.SUM + | pydop.AVG + | pydop.MIN + | pydop.MAX + | pydop.ANYTHING + | pydop.MEDIAN + | pydop.QUANTILE + ): + for predicate in [ + LogicalPredicate.NOT_NULL, + LogicalPredicate.NOT_NEGATIVE, + LogicalPredicate.POSITIVE, + ]: + if predicate in arg_predicates[0]: + output_predicates.add(predicate) + case pydop.DEFAULT_TO: + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + if any(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): + output_predicates.add(LogicalPredicate.NOT_NULL) + for pred in arg_predicates[0]: + if all(pred in preds for preds in arg_predicates): + output_predicates.add(pred) + return output_expr, output_predicates + + +def simplify_window_call( + expr: WindowCallExpression, + arg_predicates: list[set[LogicalPredicate]], +) -> tuple[RelationalExpression, set[LogicalPredicate]]: + """ + TODO + """ + output_predicates: set[LogicalPredicate] = set() + return expr, output_predicates + + +def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: + """ + Infers logical predicates from a literal expression. + + Args: + `expr`: The literal expression to infer predicates from. + + Returns: + A set of logical predicates inferred from the literal. + """ + output_predicates: set[LogicalPredicate] = set() + if expr.value is not None: + output_predicates.add(LogicalPredicate.NOT_NULL) + if isinstance(expr.value, (int, float)): + if expr.value >= 0: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if expr.value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + return output_predicates + + def run_simplification( expr: RelationalExpression, input_predicates: dict[RelationalExpression, set[LogicalPredicate]], + no_group_aggregate: bool, ) -> tuple[RelationalExpression, set[LogicalPredicate]]: """ Runs the simplification on a single expression, applying any predicates @@ -53,6 +137,9 @@ def run_simplification( `expr`: The expression to simplify. `input_predicates`: A dictionary mapping input columns to the set of predicates that are true for the column. + `no_group_aggregate`: A boolean indicating whether the expression is + part of an aggregate operation w/o keys, which affects how predicates + are inferred. Returns: The simplified expression and a set of predicates that apply to the @@ -63,27 +150,29 @@ def run_simplification( new_orders: list[ExpressionSortInfo] arg_predicates: list[set[LogicalPredicate]] output_predicates: set[LogicalPredicate] = set() + requires_rewrite: bool = False if isinstance(expr, LiteralExpression): - if expr.value is not None: - output_predicates.add(LogicalPredicate.NOT_NULL) - if isinstance(expr.value, (int, float)): - if expr.value >= 0: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if expr.value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates = infer_literal_predicates(expr) if isinstance(expr, ColumnReference): - output_predicates.update(input_predicates.get(expr, set())) + output_predicates = input_predicates.get(expr, set()) if isinstance(expr, CallExpression): new_args = [] arg_predicates = [] for arg in expr.inputs: - new_arg, new_preds = run_simplification(arg, input_predicates) + new_arg, new_preds = run_simplification( + arg, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_arg is not arg new_args.append(new_arg) arg_predicates.append(new_preds) - expr = CallExpression(expr.op, expr.data_type, new_args) + if requires_rewrite: + expr = CallExpression(expr.op, expr.data_type, new_args) + expr, output_predicates = simplify_function_call( + expr, arg_predicates, no_group_aggregate + ) if isinstance(expr, WindowCallExpression): new_args = [] @@ -91,25 +180,37 @@ def run_simplification( new_orders = [] arg_predicates = [] for arg in expr.inputs: - new_arg, new_preds = run_simplification(arg, input_predicates) + new_arg, new_preds = run_simplification( + arg, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_arg is not arg new_args.append(new_arg) arg_predicates.append(new_preds) for partition in expr.partition_inputs: - new_partition, _ = run_simplification(partition, input_predicates) + new_partition, _ = run_simplification( + partition, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_partition is not partition new_partitions.append(new_partition) for order in expr.order_inputs: - new_order, _ = run_simplification(order.expr, input_predicates) + new_order, _ = run_simplification( + order.expr, input_predicates, no_group_aggregate + ) + requires_rewrite |= new_order is not order.expr new_orders.append( ExpressionSortInfo(new_order, order.ascending, order.nulls_first) ) - expr = WindowCallExpression( - expr.op, - expr.data_type, - new_args, - new_partitions, - new_orders, - expr.kwargs, - ) + if requires_rewrite: + expr = WindowCallExpression( + expr.op, + expr.data_type, + new_args, + new_partitions, + new_orders, + expr.kwargs, + ) + expr, output_predicates = simplify_window_call(expr, arg_predicates) + return expr, output_predicates @@ -150,16 +251,16 @@ def simplify_expressions( for name, expr in node.columns.items(): ref_expr = ColumnReference(name, expr.data_type) node.columns[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, False ) if isinstance(node, (Filter, Join)): - node._condition = run_simplification(node.condition, input_predicates)[ - 0 - ] + node._condition = run_simplification( + node.condition, input_predicates, False + )[0] if isinstance(node, (RelationalRoot, Limit)): node._orderings = [ ExpressionSortInfo( - run_simplification(order_expr.expr, input_predicates)[0], + run_simplification(order_expr.expr, input_predicates, False)[0], order_expr.ascending, order_expr.nulls_first, ) @@ -169,17 +270,24 @@ def simplify_expressions( node._ordered_columns = [ (name, node.columns[name]) for name, _ in node.ordered_columns ] + if isinstance(node, Join) and node.join_type != JoinType.INNER: + for expr, preds in output_predicates.items(): + if ( + isinstance(expr, ColumnReference) + and expr.input_name != node.default_input_aliases[0] + ): + preds.discard(LogicalPredicate.NOT_NULL) case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) node.keys[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, False ) node.columns[name] = node.keys[name] for name, expr in node.aggregations.items(): ref_expr = ColumnReference(name, expr.data_type) new_agg, output_predicates[ref_expr] = run_simplification( - expr, input_predicates + expr, input_predicates, len(node.keys) == 0 ) assert isinstance(new_agg, CallExpression) node.aggregations[name] = new_agg diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 014c1dbdf..a17af4c0d 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', 0:numeric), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt index c1121462b..9a985234e 100644 --- a/tests/test_plan_refsols/agg_simplification_2.txt +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('a3', DEFAULT_TO(sum_nj, 0:numeric)), ('a4', DEFAULT_TO(DEFAULT_TO(sum_sz, 0:numeric), 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) +ROOT(columns=[('state', sbCustState), ('a1', n_rows), ('a2', sum_n_rows), ('a3', sum_nj), ('a4', DEFAULT_TO(sum_sz, 0:numeric)), ('a5', min_min_sbCustPhone), ('a6', max_max_sbCustPhone), ('a7', min_anys), ('a8', min_anys), ('a9', min_anys)], orderings=[(sbCustState):asc_first]) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_max_sbCustPhone': MAX(sbCustPhone), 'min_anys': ANYTHING(LOWER(sbCustState)), 'min_min_sbCustPhone': MIN(sbCustPhone), 'n_rows': NDISTINCT(sbCustCity), 'sum_n_rows': COUNT(), 'sum_nj': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_sz': SUM(INTEGER(sbCustPostalCode))}) SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 7e7f2a981..8e8ffd889 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(n_rows, 0:numeric)), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index e57b541ed..0f4659d66 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index a2d4305ca..f84051d87 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index f28e37a55..19cf86f66 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 4df2fe150..c7443fe73 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 036dab140..f580316b2 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', DEFAULT_TO(n_rows, 0:numeric)), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) +ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index d5af6cd67..05d4bb7bd 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index f3d26328c..47e4f9c62 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_parts_ordered', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_distinct_parts', DEFAULT_TO(ndistinct_l_partkey, 0:numeric))], orderings=[(DEFAULT_TO(ndistinct_l_partkey, 0:numeric) / DEFAULT_TO(n_rows_1, 0:numeric)):asc_first, (c_name):asc_first], limit=5:numeric) +ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 9de4be686..c34cd7ff5 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) +ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 646b9855d..e3dbcd52b 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n', DEFAULT_TO(sum_n_above_avg, 0:numeric))], orderings=[]) +ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) FILTER(condition=o_totalprice >= 0.5:numeric * DEFAULT_TO(sum_o_totalprice, 0:numeric), columns={}) JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice, 'sum_o_totalprice': t0.sum_o_totalprice}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index f3b0080b7..2beba08e0 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', DEFAULT_TO(n_rows, 0:numeric)), ('n_above_avg_suppliers', DEFAULT_TO(n_rows_1, 0:numeric)), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) +ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) JOIN(condition=t0.anything_n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_nationkey': t0.anything_n_nationkey, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_nationkey': anything_n_nationkey, 'anything_n_regionkey': anything_n_regionkey, 'n_rows': n_rows}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 41ba7be61..25f33c177 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,8 +1,8 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) - FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 6c6ea66d7..9fad186da 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -2,18 +2,17 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 69f9b27a4..3576df074 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -2,7 +2,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=n_ticker_type_trans == 1:numeric | n_cust_type_trans == 1:numeric, columns={'sbTxId': sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t0.n_cust_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_cust_type_trans, 0:numeric) > 1:numeric, columns={'sbTxCustId': sbTxCustId}) + FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) @@ -43,7 +43,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) + FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index a307e9426..ebbe3bf16 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n', DEFAULT_TO(n, 0:numeric))], orderings=[]) +ROOT(columns=[('n', n)], orderings=[]) JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'n': t0.n}) AGGREGATE(keys={'o_orderdate': o_orderdate}, aggregations={'n': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 5ed26b322..380033384 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', r_name), ('n_cust', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) +ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 050999da6..639a18e28 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) +ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index a64d4f50c..954d1b5a9 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) +ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index d27e1aa5b..53528b9d0 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(DEFAULT_TO(sum_n_incidents, 0:numeric), 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) +ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 245e3ef0e..0ce79583c 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -6,7 +6,7 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(DEFAULT_TO(sum_l_quantity, 0:numeric), 0:numeric), columns={'ps_suppkey': ps_suppkey}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 6c346edc0..8f1e739b8 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -1,6 +1,6 @@ SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, - COALESCE(COUNT(*), 0) AS su1, + COUNT(*) AS su1, COALESCE(COUNT(*) * 2, 0) AS su2, COALESCE(COUNT(*) * -1, 0) AS su3, COALESCE(COUNT(*) * -3, 0) AS su4, diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index 4716a0c4c..ab162413c 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -153,7 +153,7 @@ WITH _t1 AS ( ) SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, - COALESCE(COUNT(*), 0) AS su1, + COUNT(*) AS su1, COALESCE(COUNT(*) * 2, 0) AS su2, COALESCE(COUNT(*) * -1, 0) AS su3, COALESCE(COUNT(*) * -3, 0) AS su4, diff --git a/tests/test_sql_refsols/agg_simplification_2_ansi.sql b/tests/test_sql_refsols/agg_simplification_2_ansi.sql index d36f0482c..fef0f4709 100644 --- a/tests/test_sql_refsols/agg_simplification_2_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_2_ansi.sql @@ -1,9 +1,9 @@ SELECT sbcuststate AS state, COUNT(DISTINCT sbcustcity) AS a1, - COALESCE(COUNT(*), 0) AS a2, - COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, - COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0), 0) AS a4, + COUNT(*) AS a2, + COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0) AS a4, MIN(sbcustphone) AS a5, MAX(sbcustphone) AS a6, ANY_VALUE(LOWER(sbcuststate)) AS a7, diff --git a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql index deee0c7a6..22bc341e6 100644 --- a/tests/test_sql_refsols/agg_simplification_2_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_2_sqlite.sql @@ -1,9 +1,9 @@ SELECT sbcuststate AS state, COUNT(DISTINCT sbcustcity) AS a1, - COALESCE(COUNT(*), 0) AS a2, - COALESCE(COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END), 0) AS a3, - COALESCE(COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0), 0) AS a4, + COUNT(*) AS a2, + COUNT(CASE WHEN LOWER(sbcustname) LIKE 'j%' THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS INTEGER)), 0) AS a4, MIN(sbcustphone) AS a5, MAX(sbcustphone) AS a6, MAX(LOWER(sbcuststate)) AS a7, diff --git a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql index 28cbbc248..8d39a6781 100644 --- a/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql +++ b/tests/test_sql_refsols/sqlite_udf_count_epsilon_sqlite.sql @@ -18,7 +18,7 @@ WITH _t2 AS ( ) SELECT region.r_name AS name, - COALESCE(_s3.n_rows, 0) AS n_cust + _s3.n_rows AS n_cust FROM tpch.region AS region JOIN _s3 AS _s3 ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql index e665da566..1af11b549 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_ansi.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(SUM(_s7.n_rows), 0) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) / COUNT(*), 2) DESC, + ROUND(COALESCE(SUM(_s7.n_rows), 0) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql index 8c6123891..e7749a8a2 100644 --- a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_sqlite.sql @@ -11,7 +11,7 @@ WITH _s7 AS ( SELECT countries.co_name AS country_name, products.pr_name AS product_name, - ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(SUM(_s7.n_rows), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.countries AS countries JOIN main.devices AS devices ON countries.co_id = devices.de_production_country_id @@ -23,7 +23,7 @@ GROUP BY countries.co_name, products.pr_name ORDER BY - ROUND(CAST(COALESCE(COALESCE(SUM(_s7.n_rows), 0), 0) AS REAL) / COUNT(*), 2) DESC, + ROUND(CAST(COALESCE(SUM(_s7.n_rows), 0) AS REAL) / COUNT(*), 2) DESC, products.pr_name, countries.co_name LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql index ad09ee111..595401f9d 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_ansi.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) / COUNT(*), 2) AS ir + ROUND(COALESCE(SUM(_s3.n_rows), 0) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql index 864caeee4..6b499d7d4 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_sqlite.sql @@ -8,7 +8,7 @@ WITH _s3 AS ( ) SELECT products.pr_brand AS brand, - ROUND(CAST(COALESCE(COALESCE(SUM(_s3.n_rows), 0), 0) AS REAL) / COUNT(*), 2) AS ir + ROUND(CAST(COALESCE(SUM(_s3.n_rows), 0) AS REAL) / COUNT(*), 2) AS ir FROM main.devices AS devices JOIN main.products AS products ON devices.de_product_id = products.pr_id diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql index 572bff6d9..56095322c 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_ansi.sql @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) AS ir + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) / _s5.n_rows, 2) DESC + ROUND(COALESCE(_s5.sum_n_incidents, 0) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql index 4f0d45267..41de9fe64 100644 --- a/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_sqlite.sql @@ -22,10 +22,10 @@ SELECT products.pr_name AS product, products.pr_brand AS product_brand, products.pr_type AS product_type, - ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) AS ir + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) AS ir FROM main.products AS products JOIN _s5 AS _s5 ON _s5.de_product_id = products.pr_id ORDER BY - ROUND(CAST(COALESCE(COALESCE(_s5.sum_n_incidents, 0), 0) AS REAL) / _s5.n_rows, 2) DESC + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS REAL) / _s5.n_rows, 2) DESC LIMIT 5 diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 67f82c489..0c77ea8b5 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) + 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) GROUP BY partsupp.ps_suppkey diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index c7f3110d7..4ffe16370 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -24,7 +24,7 @@ WITH _s3 AS ( JOIN _s5 AS _s5 ON _s5.p_partkey = partsupp.ps_partkey AND partsupp.ps_availqty > ( - 0.5 * COALESCE(COALESCE(_s5.sum_l_quantity, 0), 0) + 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) GROUP BY partsupp.ps_suppkey From 7884341155d3ece95a5d08c82d1a78f1bfa72ce5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:20:32 -0400 Subject: [PATCH 45/97] Improved null handling for aggregations --- .../conversion/relational_simplification.py | 8 +++-- .../multi_partition_access_5.txt | 33 +++++++++---------- .../multi_partition_access_6.txt | 2 +- tests/test_plan_refsols/simple_cross_3.txt | 2 +- tests/test_plan_refsols/triple_partition.txt | 2 +- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 58414afcb..510d3675e 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -59,7 +59,7 @@ def simplify_function_call( if ( len(expr.inputs) == 1 and LogicalPredicate.NOT_NULL in arg_predicates[0] - and no_group_aggregate + and not no_group_aggregate ): output_predicates.add(LogicalPredicate.POSITIVE) case ( @@ -72,12 +72,16 @@ def simplify_function_call( | pydop.QUANTILE ): for predicate in [ - LogicalPredicate.NOT_NULL, LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE, ]: if predicate in arg_predicates[0]: output_predicates.add(predicate) + if ( + LogicalPredicate.NOT_NULL in arg_predicates[0] + and not no_group_aggregate + ): + output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 9fad186da..0c443c6e8 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) +ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.sum_n_ticker_type_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / sum_n_ticker_type_trans > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3576df074..8dedc5205 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -35,7 +35,7 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - FILTER(condition=DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 1:numeric, columns={'sbTxType': sbTxType}) + FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) diff --git a/tests/test_plan_refsols/simple_cross_3.txt b/tests/test_plan_refsols/simple_cross_3.txt index 9c08e86d4..9ce8291db 100644 --- a/tests/test_plan_refsols/simple_cross_3.txt +++ b/tests/test_plan_refsols/simple_cross_3.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('supplier_nation', anything_anything_supplier_nation), ('customer_nation', anything_anything_n_name), ('nation_combinations', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) +ROOT(columns=[('supplier_nation', anything_anything_supplier_nation), ('customer_nation', anything_anything_n_name), ('nation_combinations', sum_n_rows)], orderings=[]) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'anything_anything_supplier_nation': ANYTHING(anything_supplier_nation), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_UNKNOWN, columns={'anything_n_name': t0.anything_n_name, 'anything_supplier_nation': t0.anything_supplier_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'r_regionkey': t0.r_regionkey}) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'l_suppkey': l_suppkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_supplier_nation': ANYTHING(supplier_nation), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index a5ed33abe..7b3963130 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) - AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / DEFAULT_TO(sum_n_instances, 0:numeric))}) + AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) From b281c8a621a2a6ee037d764398ea26362b4aba67 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 20:37:52 -0400 Subject: [PATCH 46/97] Added more simplification rules --- .../conversion/relational_simplification.py | 86 +++++++++++++++++++ .../agg_simplification_1.txt | 2 +- .../agg_simplification_1_ansi.sql | 8 +- .../agg_simplification_1_sqlite.sql | 8 +- 4 files changed, 95 insertions(+), 9 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 510d3675e..45698b619 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -42,6 +42,55 @@ class LogicalPredicate(Enum): POSITIVE = "POSITIVE" +NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { + pydop.ADD, + pydop.SUB, + pydop.MUL, + pydop.BAN, + pydop.BOR, + pydop.NOT, + pydop.LOWER, + pydop.UPPER, + pydop.LENGTH, + pydop.STRIP, + pydop.REPLACE, + pydop.FIND, + pydop.ABS, + pydop.CEIL, + pydop.FLOOR, + pydop.ROUND, + pydop.EQU, + pydop.NEQ, + pydop.GEQ, + pydop.GRT, + pydop.LET, + pydop.LEQ, + pydop.BXR, + pydop.STARTSWITH, + pydop.ENDSWITH, + pydop.CONTAINS, + pydop.LIKE, + pydop.SIGN, + pydop.SMALLEST, + pydop.LARGEST, + pydop.IFF, + pydop.YEAR, + pydop.MONTH, + pydop.DAY, + pydop.HOUR, + pydop.MINUTE, + pydop.SECOND, + pydop.DATEDIFF, + pydop.DAYNAME, + pydop.DAYOFWEEK, + pydop.SLICE, + pydop.LPAD, + pydop.RPAD, + pydop.MONOTONIC, + pydop.JOIN_STRINGS, +} + + def simplify_function_call( expr: CallExpression, arg_predicates: list[set[LogicalPredicate]], @@ -52,6 +101,9 @@ def simplify_function_call( """ output_expr: RelationalExpression = expr output_predicates: set[LogicalPredicate] = set() + if expr.op in NULL_PROPAGATING_OPS: + if all(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): + output_predicates.add(LogicalPredicate.NOT_NULL) match expr.op: case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) @@ -92,6 +144,40 @@ def simplify_function_call( for pred in arg_predicates[0]: if all(pred in preds for preds in arg_predicates): output_predicates.add(pred) + case pydop.ABS: + if ( + LogicalPredicate.POSITIVE in arg_predicates[0] + or LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + ): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case ( + pydop.LENGTH + | pydop.BAN + | pydop.BOR + | pydop.BXR + | pydop.EQU + | pydop.NEQ + | pydop.GEQ + | pydop.GRT + | pydop.LET + | pydop.LEQ + | pydop.STARTSWITH + | pydop.ENDSWITH + | pydop.CONTAINS + | pydop.LIKE + | pydop.SQRT + ): + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.PRESENT: + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.ABSENT: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return output_expr, output_predicates diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index a17af4c0d..feb52edbe 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', 0:numeric), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 8f1e739b8..1807f6d2e 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -1,11 +1,11 @@ SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, COUNT(*) AS su1, - COALESCE(COUNT(*) * 2, 0) AS su2, - COALESCE(COUNT(*) * -1, 0) AS su3, - COALESCE(COUNT(*) * -3, 0) AS su4, + COUNT(*) * 2 AS su2, + COUNT(*) * -1 AS su3, + COUNT(*) * -3 AS su4, 0 AS su5, - COALESCE(COUNT(*) * 0.5, 0) AS su6, + COUNT(*) * 0.5 AS su6, COALESCE(NULL, 0) AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index ab162413c..515f11664 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -154,11 +154,11 @@ WITH _t1 AS ( SELECT LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, COUNT(*) AS su1, - COALESCE(COUNT(*) * 2, 0) AS su2, - COALESCE(COUNT(*) * -1, 0) AS su3, - COALESCE(COUNT(*) * -3, 0) AS su4, + COUNT(*) * 2 AS su2, + COUNT(*) * -1 AS su3, + COUNT(*) * -3 AS su4, 0 AS su5, - COALESCE(COUNT(*) * 0.5, 0) AS su6, + COUNT(*) * 0.5 AS su6, COALESCE(NULL, 0) AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), From 78f6e78f63e2b8cc705514e030c453ce89c2779f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 18 Jul 2025 21:12:07 -0400 Subject: [PATCH 47/97] More >0 filter improvements --- .../conversion/relational_simplification.py | 62 ++++++++++++++++--- tests/test_plan_refsols/common_prefix_al.txt | 17 +++-- ...ch_overlapping_event_searches_per_user.txt | 23 ++++--- tests/test_plan_refsols/tpch_q20.txt | 21 +++---- ...erlapping_event_searches_per_user_ansi.sql | 9 +-- ...lapping_event_searches_per_user_sqlite.sql | 9 +-- tests/test_sql_refsols/tpch_q20_ansi.sql | 11 ++-- tests/test_sql_refsols/tpch_q20_sqlite.sql | 11 ++-- 8 files changed, 95 insertions(+), 68 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 45698b619..cceec7bf4 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -108,10 +108,8 @@ def simplify_function_call( case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if ( - len(expr.inputs) == 1 - and LogicalPredicate.NOT_NULL in arg_predicates[0] - and not no_group_aggregate + if not no_group_aggregate and ( + len(expr.inputs) == 0 or LogicalPredicate.NOT_NULL in arg_predicates[0] ): output_predicates.add(LogicalPredicate.POSITIVE) case ( @@ -158,19 +156,36 @@ def simplify_function_call( | pydop.BAN | pydop.BOR | pydop.BXR - | pydop.EQU - | pydop.NEQ - | pydop.GEQ - | pydop.GRT - | pydop.LET - | pydop.LEQ | pydop.STARTSWITH | pydop.ENDSWITH | pydop.CONTAINS | pydop.LIKE | pydop.SQRT + | pydop.MONOTONIC ): output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: + match (expr.op, expr.inputs[1]): + case (pydop.GRT, LiteralExpression()) if ( + expr.inputs[1].value == 0 + and LogicalPredicate.POSITIVE in arg_predicates[0] + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case (pydop.GEQ, LiteralExpression()) if ( + expr.inputs[1].value == 0 + and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case _: + pass + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.PRESENT: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = LiteralExpression(True, expr.data_type) @@ -189,6 +204,32 @@ def simplify_window_call( TODO """ output_predicates: set[LogicalPredicate] = set() + no_frame: bool = not ( + expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs + ) + match expr.op: + case pydop.RANKING | pydop.PERCENTILE: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.add(LogicalPredicate.POSITIVE) + case pydop.RELSUM | pydop.RELAVG: + if LogicalPredicate.NOT_NULL in arg_predicates[0] and no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + if LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if LogicalPredicate.POSITIVE in arg_predicates[0] and no_frame: + output_predicates.add(LogicalPredicate.POSITIVE) + case pydop.RELSIZE: + if no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.RELCOUNT: + if no_frame: + output_predicates.add(LogicalPredicate.NOT_NULL) + if LogicalPredicate.NOT_NULL in arg_predicates[0]: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return expr, output_predicates @@ -367,6 +408,7 @@ def simplify_expressions( and expr.input_name != node.default_input_aliases[0] ): preds.discard(LogicalPredicate.NOT_NULL) + preds.discard(LogicalPredicate.POSITIVE) case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index 05d4bb7bd..b00157d0e 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -15,12 +15,11 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discou SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) - FILTER(condition=n_rows > 0:numeric, columns={'o_custkey': o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) - FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) + FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index 91e424686..dbc3fc089 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -1,14 +1,13 @@ ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searches)], orderings=[(n_searches):desc_last, (anything_anything_user_name):asc_first], limit=4:numeric) AGGREGATE(keys={'anything_user_id': anything_user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) - FILTER(condition=n_rows > 0:numeric, columns={'anything_user_id': anything_user_id, 'anything_user_name': anything_user_name}) - AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name), 'n_rows': COUNT()}) - FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) - SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=EVENTS, columns={'ev_name': ev_name}) - SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) - SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_user_id': ANYTHING(user_id), 'anything_user_name': ANYTHING(user_name)}) + FILTER(condition=name_9 != user_name, columns={'search_id': search_id, 'user_id': user_id, 'user_name': user_name}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.user_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) + SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=EVENTS, columns={'ev_name': ev_name}) + SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) + SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 0ce79583c..c5975d83f 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -4,14 +4,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=n_rows > 0:numeric, columns={'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) - FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) + FILTER(condition=ps_availqty > 0.5:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric), columns={'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql index 2ec30bec6..335d943d0 100644 --- a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_ansi.sql @@ -3,11 +3,10 @@ WITH _s0 AS ( user_id, user_name FROM users -), _t2 AS ( +), _t1 AS ( SELECT ANY_VALUE(_s0.user_id) AS anything_user_id, - ANY_VALUE(_s0.user_name) AS anything_user_name, - COUNT(*) AS n_rows + ANY_VALUE(_s0.user_name) AS anything_user_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.user_id = searches.search_user_id @@ -24,9 +23,7 @@ WITH _s0 AS ( SELECT ANY_VALUE(anything_user_name) AS user_name, COUNT(*) AS n_searches -FROM _t2 -WHERE - n_rows > 0 +FROM _t1 GROUP BY anything_user_id ORDER BY diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql index a31a1ac1d..1f1901bc0 100644 --- a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_sqlite.sql @@ -3,11 +3,10 @@ WITH _s0 AS ( user_id, user_name FROM users -), _t2 AS ( +), _t1 AS ( SELECT MAX(_s0.user_id) AS anything_user_id, - MAX(_s0.user_name) AS anything_user_name, - COUNT(*) AS n_rows + MAX(_s0.user_name) AS anything_user_name FROM _s0 AS _s0 JOIN searches AS searches ON _s0.user_id = searches.search_user_id @@ -28,9 +27,7 @@ WITH _s0 AS ( SELECT MAX(anything_user_name) AS user_name, COUNT(*) AS n_searches -FROM _t2 -WHERE - n_rows > 0 +FROM _t1 GROUP BY anything_user_id ORDER BY diff --git a/tests/test_sql_refsols/tpch_q20_ansi.sql b/tests/test_sql_refsols/tpch_q20_ansi.sql index 0c77ea8b5..7b8888ca6 100644 --- a/tests/test_sql_refsols/tpch_q20_ansi.sql +++ b/tests/test_sql_refsols/tpch_q20_ansi.sql @@ -16,9 +16,8 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t2 AS ( - SELECT - COUNT(*) AS n_rows, +), _s7 AS ( + SELECT DISTINCT partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 @@ -26,8 +25,6 @@ WITH _s3 AS ( AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - GROUP BY - partsupp.ps_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -35,8 +32,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t2 AS _t2 - ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey +JOIN _s7 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q20_sqlite.sql b/tests/test_sql_refsols/tpch_q20_sqlite.sql index 4ffe16370..cb3029133 100644 --- a/tests/test_sql_refsols/tpch_q20_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q20_sqlite.sql @@ -16,9 +16,8 @@ WITH _s3 AS ( ON _s3.l_partkey = part.p_partkey WHERE part.p_name LIKE 'forest%' -), _t2 AS ( - SELECT - COUNT(*) AS n_rows, +), _s7 AS ( + SELECT DISTINCT partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN _s5 AS _s5 @@ -26,8 +25,6 @@ WITH _s3 AS ( AND partsupp.ps_availqty > ( 0.5 * COALESCE(_s5.sum_l_quantity, 0) ) - GROUP BY - partsupp.ps_suppkey ) SELECT supplier.s_name AS S_NAME, @@ -35,8 +32,8 @@ SELECT FROM tpch.supplier AS supplier JOIN tpch.nation AS nation ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey -JOIN _t2 AS _t2 - ON _t2.n_rows > 0 AND _t2.ps_suppkey = supplier.s_suppkey +JOIN _s7 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey ORDER BY s_name LIMIT 10 From e8a54b8da728eef812244f511bc964932b5816ab Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 19 Jul 2025 01:17:36 -0400 Subject: [PATCH 48/97] Added IFF and KEEP_IF rules --- .../conversion/relational_simplification.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index cceec7bf4..d807853a6 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -193,6 +193,38 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NEGATIVE) case pydop.ABSENT: output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.IFF: + if isinstance(expr.inputs[0], LiteralExpression): + if bool(expr.inputs[0].value): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = expr.inputs[2] + output_predicates = arg_predicates[2] + elif ( + LogicalPredicate.POSITIVE in arg_predicates[0] + and LogicalPredicate.NOT_NULL in arg_predicates[0] + ): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_predicates = arg_predicates[1] & arg_predicates[2] + case pydop.KEEP_IF: + if isinstance(expr.inputs[1], LiteralExpression): + if bool(expr.inputs[1].value): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + else: + output_expr = LiteralExpression(None, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + elif ( + LogicalPredicate.POSITIVE in arg_predicates[1] + and LogicalPredicate.NOT_NULL in arg_predicates[1] + ): + output_expr = expr.inputs[0] + output_predicates = arg_predicates[0] + elif LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) return output_expr, output_predicates From b8a7f77ad2a4cbd396805ee9ed2202228b89ef5f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:00:48 -0400 Subject: [PATCH 49/97] Added more simplification patterns and tests --- .../conversion/relational_simplification.py | 24 +++++++- tests/test_pipeline_defog_custom.py | 61 +++++++++++++++++++ tests/test_pipeline_pagerank.py | 28 ++++----- tests/test_plan_refsols/simplification_1.txt | 3 + tests/testing_utilities.py | 41 ++++++++----- 5 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 tests/test_plan_refsols/simplification_1.txt diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index d807853a6..70e8ef858 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -108,10 +108,19 @@ def simplify_function_call( case pydop.COUNT | pydop.NDISTINCT: output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if not no_group_aggregate and ( - len(expr.inputs) == 0 or LogicalPredicate.NOT_NULL in arg_predicates[0] + if not no_group_aggregate: + if ( + len(expr.inputs) == 0 + or LogicalPredicate.NOT_NULL in arg_predicates[0] + ): + output_predicates.add(LogicalPredicate.POSITIVE) + elif ( + expr.op == pydop.COUNT + and len(expr.inputs) == 1 + and LogicalPredicate.NOT_NULL in arg_predicates[0] ): output_predicates.add(LogicalPredicate.POSITIVE) + output_expr = CallExpression(pydop.COUNT, expr.data_type, []) case ( pydop.SUM | pydop.AVG @@ -132,6 +141,17 @@ def simplify_function_call( and not no_group_aggregate ): output_predicates.add(LogicalPredicate.NOT_NULL) + case pydop.ADD | pydop.MUL | pydop.DIV: + for predicate in [LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE]: + if all(predicate in preds for preds in arg_predicates): + output_predicates.add(predicate) + if expr.op == pydop.DIV: + if ( + LogicalPredicate.NOT_NULL in arg_predicates[0] + and LogicalPredicate.NOT_NULL in arg_predicates[1] + and LogicalPredicate.POSITIVE in arg_predicates[1] + ): + output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index c127a57e4..4e45fa868 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1587,6 +1587,67 @@ def get_day_of_week( ), id="get_part_single", ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = ABS(13)," # -> 13 + " s01 = ABS(0)," # -> 0 + " s02 = ABS(COUNT(customers))," # -> COUNT(customers) + " s03 = ABS(COUNT(customers) + 5)," # -> COUNT(customers) + 5 + " s04 = ABS(COUNT(customers) * 2)," # -> COUNT(customers) * 2 + " s05 = ABS(COUNT(customers) / 8.0)," # -> COUNT(customers) / 8.0 + " s06 = DEFAULT_TO(10, 0)," # -> 10 + " s07 = DEFAULT_TO(COUNT(customers), 0)," # -> COUNT(customers) + " s08 = DEFAULT_TO(ABS(COUNT(customers) - 25), 0)," # -> ABS(COUNT(customers) - 25) + " s09 = DEFAULT_TO(COUNT(customers) + 1, 0)," # -> COUNT(customers) + 1 + " s10 = DEFAULT_TO(COUNT(customers) - 3, 0)," # -> COUNT(customers) - 3 + " s11 = DEFAULT_TO(COUNT(customers) * -1, 0)," # -> COUNT(customers) * -1 + " s12 = DEFAULT_TO(COUNT(customers) / 2.5, 0)," # -> COUNT(customers) / 2.5 + " s13 = DEFAULT_TO(COUNT(customers) > 10, False)," # -> COUNT(customers) > 10 + " s14 = DEFAULT_TO(COUNT(customers) >= 10, False)," # -> COUNT(customers) >= 10 + " s15 = DEFAULT_TO(COUNT(customers) == 20, False)," # -> COUNT(customers) == 10 + " s16 = DEFAULT_TO(COUNT(customers) != 25, False)," # -> COUNT(customers) != 20 + " s17 = DEFAULT_TO(COUNT(customers) < 25, False)," # -> COUNT(customers) < 25 + " s18 = DEFAULT_TO(COUNT(customers) <= 25, False)," # -> COUNT(customers) <= 25 + " s19 = COUNT(DEFAULT_TO(customers.name, ''))," # -> COUNT(customers) + " s20 = ABS(DEFAULT_TO(AVG(ABS(DEFAULT_TO(LENGTH(customers.name), 0))), 0))," # -> AVG(DEFAULT_TO(LENGTH(customers.name), '')) + " s21 = PRESENT(COUNT(customers))," # -> True + " s22 = PRESENT(1) >= 0," # -> True + " s23 = ABSENT(1) >= 0," # -> True + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [13], + "s01": [0], + "s02": [20], + "s03": [25], + "s04": [40], + "s05": [2.5], + "s06": [10], + "s07": [20], + "s08": [5], + "s09": [21], + "s10": [17], + "s11": [-20], + "s12": [8.0], + "s13": [1], + "s14": [1], + "s15": [1], + "s16": [1], + "s17": [1], + "s18": [1], + "s19": [20], + "s20": [12.3], + "s21": [1], + "s22": [1], + "s23": [1], + } + ), + "simplification_1", + ), + id="simplification_1", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_pipeline_pagerank.py b/tests/test_pipeline_pagerank.py index 8d37255a6..b5e90efd7 100644 --- a/tests/test_pipeline_pagerank.py +++ b/tests/test_pipeline_pagerank.py @@ -28,7 +28,7 @@ ), "pagerank_a0", order_sensitive=True, - args=[0], + kwargs={"n_iters": 0}, ), id="pagerank_a0", ), @@ -44,7 +44,7 @@ ), "pagerank_a1", order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_a1", ), @@ -60,7 +60,7 @@ ), "pagerank_a2", order_sensitive=True, - args=[2], + kwargs={"n_iters": 2}, ), id="pagerank_a2", ), @@ -76,7 +76,7 @@ ), "pagerank_a6", order_sensitive=True, - args=[6], + kwargs={"n_iters": 6}, ), id="pagerank_a6", ), @@ -94,7 +94,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[0], + kwargs={"n_iters": 0}, ), id="pagerank_b0", ), @@ -112,7 +112,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_b1", ), @@ -128,7 +128,7 @@ ), "pagerank_b3", order_sensitive=True, - args=[3], + kwargs={"n_iters": 3}, ), id="pagerank_b3", ), @@ -153,7 +153,7 @@ ), "pagerank_c4", order_sensitive=True, - args=[4], + kwargs={"n_iters": 4}, ), id="pagerank_c4", ), @@ -188,7 +188,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_d1", ), @@ -221,7 +221,7 @@ ), "pagerank_d5", order_sensitive=True, - args=[5], + kwargs={"n_iters": 5}, ), id="pagerank_d5", ), @@ -239,7 +239,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[1], + kwargs={"n_iters": 1}, ), id="pagerank_e1", ), @@ -257,7 +257,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[2], + kwargs={"n_iters": 2}, ), id="pagerank_f2", ), @@ -283,7 +283,7 @@ skip_relational=True, skip_sql=True, order_sensitive=True, - args=[5], + kwargs={"n_iters": 5}, ), id="pagerank_g5", ), @@ -350,7 +350,7 @@ ), "pagerank_h8", order_sensitive=True, - args=[8], + kwargs={"n_iters": 8}, ), id="pagerank_h8", ), diff --git a/tests/test_plan_refsols/simplification_1.txt b/tests/test_plan_refsols/simplification_1.txt new file mode 100644 index 000000000..9a342e6c4 --- /dev/null +++ b/tests/test_plan_refsols/simplification_1.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', DEFAULT_TO(avg_expr_4, 0:numeric)), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) + AGGREGATE(keys={}, aggregations={'avg_expr_4': AVG(DEFAULT_TO(LENGTH(sbCustName), 0:numeric)), 'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 2b5750cb1..c747cc044 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -904,25 +904,33 @@ def make_relational_ordering( def transform_and_exec_pydough( - pydough_impl: Callable[..., UnqualifiedNode], + pydough_impl: Callable[..., UnqualifiedNode] | str, graph: GraphMetadata, - args: list[Any] | None, + kwargs: dict | None, ) -> UnqualifiedNode: """ Obtains the unqualified node from a PyDough function by invoking the - decorator to transform it, then calling the transformed function. + decorator to transform it (or evaluating the string if provided), then + calling the transformed function. Args: - `pydough_impl`: The PyDough function to be transformed and executed. + `pydough_impl`: The PyDough function to be transformed and executed, + or the string containing the PyDough code to be executed. `graph`: The metadata being used. - `args`: The arguments to pass to the PyDough function, if any. + `kwargs`: The keyword arguments to pass to the PyDough function, if + any. Returns: The unqualified node created by running the transformed version of `pydough_impl`. """ - args = args if args is not None else [] - return init_pydough_context(graph)(pydough_impl)(*args) + kwargs = kwargs if kwargs is not None else {} + if isinstance(pydough_impl, str): + # If the pydough_impl is a string, parse it with pydough.from_string. + return pydough.from_string(pydough_impl, metadata=graph, environment=kwargs) + else: + # OTherwise, transform the function with the decorator and call it. + return init_pydough_context(graph)(pydough_impl)(**kwargs) @dataclass @@ -1030,7 +1038,7 @@ class PyDoughPandasTest: a function that returns a Pandas DataFrame. The dataclass contains the following fields: - `pydough_function`: the function that returns the PyDough code evaluated - by the unit test. + by the unit test, or a string representing the PyDough code. - `graph_name`: the name of the graph that the PyDough code will use. - `pd_function`: the function that returns the Pandas DataFrame that should be used as the reference solution. @@ -1049,9 +1057,10 @@ class PyDoughPandasTest: testing. Default is False. """ - pydough_function: Callable[..., UnqualifiedNode] + pydough_function: Callable[..., UnqualifiedNode] | str """ - Function that returns the PyDough code evaluated by the unit test. + Function that returns the PyDough code evaluated by the unit test, or a + string representing the PyDough code. """ graph_name: str @@ -1088,10 +1097,10 @@ class PyDoughPandasTest: same column names as in the reference solution. """ - args: list[Any] | None = None + kwargs: dict | None = None """ - Any additional arguments to pass to the PyDough function when - executing it. If None, no additional arguments are passed. + Any additional keyword arguments to pass to the PyDough function when + executing it. If None, no additional keyword arguments are passed. """ skip_relational: bool = False @@ -1132,7 +1141,7 @@ def run_relational_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Run the PyDough code through the pipeline up until it is converted to @@ -1190,7 +1199,7 @@ def run_sql_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Convert the PyDough code to SQL text @@ -1235,7 +1244,7 @@ def run_e2e_test( # Obtain the graph and the unqualified node graph: GraphMetadata = fetcher(self.graph_name) root: UnqualifiedNode = transform_and_exec_pydough( - self.pydough_function, graph, self.args + self.pydough_function, graph, self.kwargs ) # Obtain the DataFrame result from the PyDough code From bc5f383d52efaf51ae48d5658b8d2b8b07393f65 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:15:55 -0400 Subject: [PATCH 50/97] Minor refactoring --- tests/test_pipeline_defog_custom.py | 51 ++++++++++++++++++- tests/test_pydough_to_sql.py | 18 ------- .../simplification_1_ansi.sql | 26 ++++++++++ .../simplification_1_sqlite.sql | 26 ++++++++++ 4 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 tests/test_sql_refsols/simplification_1_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_1_sqlite.sql diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 4e45fa868..9f0a587c0 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -11,7 +11,7 @@ from pydough import init_pydough_context, to_df, to_sql from pydough.configs import DayOfWeek, PyDoughConfigs -from pydough.database_connectors import DatabaseContext +from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.metadata import GraphMetadata from pydough.unqualified import ( UnqualifiedNode, @@ -145,6 +145,7 @@ def get_day_of_week( {"symbol": ["AAPL", "AMZN", "BRK.B", "FB", "GOOG"]} ), "multi_partition_access_1", + skip_sql=True, ), id="multi_partition_access_1", ), @@ -176,6 +177,7 @@ def get_day_of_week( } ), "multi_partition_access_2", + skip_sql=True, ), id="multi_partition_access_2", ), @@ -212,6 +214,7 @@ def get_day_of_week( } ), "multi_partition_access_3", + skip_sql=True, ), id="multi_partition_access_3", ), @@ -228,6 +231,7 @@ def get_day_of_week( } ), "multi_partition_access_4", + skip_sql=True, ), id="multi_partition_access_4", ), @@ -267,6 +271,7 @@ def get_day_of_week( } ), "multi_partition_access_5", + skip_sql=True, ), id="multi_partition_access_5", ), @@ -300,6 +305,7 @@ def get_day_of_week( } ), "multi_partition_access_6", + skip_sql=True, ), id="multi_partition_access_6", ), @@ -492,6 +498,7 @@ def get_day_of_week( } ), "cumulative_stock_analysis", + skip_sql=True, ), id="cumulative_stock_analysis", ), @@ -516,6 +523,7 @@ def get_day_of_week( } ), "time_threshold_reached", + skip_sql=True, ), id="time_threshold_reached", ), @@ -547,6 +555,7 @@ def get_day_of_week( } ), "hour_minute_day", + skip_sql=True, ), id="hour_minute_day", ), @@ -595,6 +604,7 @@ def get_day_of_week( } ), "exponentiation", + skip_sql=True, ), id="exponentiation", ), @@ -747,6 +757,7 @@ def get_day_of_week( } ), "years_months_days_hours_datediff", + skip_sql=True, ), id="years_months_days_hours_datediff", ), @@ -856,6 +867,7 @@ def get_day_of_week( } ), "minutes_seconds_datediff", + skip_sql=True, ), id="minutes_seconds_datediff", ), @@ -907,6 +919,7 @@ def get_day_of_week( ), ), "padding_functions", + skip_sql=True, ), id="padding_functions", ), @@ -985,6 +998,7 @@ def get_day_of_week( wo_step9=lambda x: x["name"].str[2:2], ), "step_slicing", + skip_sql=True, ), id="step_slicing", ), @@ -1004,6 +1018,7 @@ def get_day_of_week( sign_high_zero=0, ), "sign", + skip_sql=True, ), id="sign", ), @@ -1026,6 +1041,7 @@ def get_day_of_week( } ), "find", + skip_sql=True, ), id="find", ), @@ -1045,6 +1061,7 @@ def get_day_of_week( } ), "strip", + skip_sql=True, ), id="strip", ), @@ -1074,6 +1091,7 @@ def get_day_of_week( } ), "replace", + skip_sql=True, ), id="replace", ), @@ -1104,6 +1122,7 @@ def get_day_of_week( } ), "str_count", + skip_sql=True, ), id="str_count", ), @@ -1135,6 +1154,7 @@ def get_day_of_week( } ), "get_part_multiple", + skip_sql=True, ), id="get_part_multiple", ), @@ -1408,6 +1428,7 @@ def get_day_of_week( } ), "week_offset", + skip_sql=True, ), id="week_offset", ), @@ -1431,6 +1452,7 @@ def get_day_of_week( } ), "window_sliding_frame_relsize", + skip_sql=True, ), id="window_sliding_frame_relsize", ), @@ -1454,6 +1476,7 @@ def get_day_of_week( } ), "window_sliding_frame_relsum", + skip_sql=True, ), id="window_sliding_frame_relsum", ), @@ -1659,7 +1682,7 @@ def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: return request.param -def test_pipeline_until_relational_defog( +def test_pipeline_until_relational_defog_custom( defog_custom_pipeline_test_data: PyDoughPandasTest, defog_graphs: graph_fetcher, get_plan_test_filename: Callable[[str], str], @@ -1676,6 +1699,30 @@ def test_pipeline_until_relational_defog( ) +def test_pipeline_until_sql_defog_custom( + defog_custom_pipeline_test_data: PyDoughPandasTest, + defog_graphs: graph_fetcher, + empty_context_database: DatabaseContext, + defog_config: PyDoughConfigs, + get_sql_test_filename: Callable[[str, DatabaseDialect], str], + update_tests: bool, +): + """ + Tests that the PyDough queries from `defog_custom_pipeline_test_data` + generate correct SQL text. + """ + file_path: str = get_sql_test_filename( + defog_custom_pipeline_test_data.test_name, empty_context_database.dialect + ) + defog_custom_pipeline_test_data.run_sql_test( + defog_graphs, + file_path, + update_tests, + empty_context_database, + config=defog_config, + ) + + @pytest.mark.execute def test_pipeline_e2e_defog_custom( defog_custom_pipeline_test_data: PyDoughPandasTest, diff --git a/tests/test_pydough_to_sql.py b/tests/test_pydough_to_sql.py index ab3116296..493087db9 100644 --- a/tests/test_pydough_to_sql.py +++ b/tests/test_pydough_to_sql.py @@ -26,8 +26,6 @@ window_functions, ) from tests.test_pydough_functions.simple_pydough_functions import ( - agg_simplification_1, - agg_simplification_2, cumulative_stock_analysis, datediff, datetime_sampler, @@ -35,7 +33,6 @@ floor_and_ceil, floor_and_ceil_2, get_part_multiple, - get_part_single, global_acctbal_breakdown, hour_minute_day, nation_acctbal_breakdown, @@ -247,18 +244,6 @@ def test_pydough_to_sql_tpch( "Broker", id="week_offset", ), - pytest.param( - agg_simplification_1, - "agg_simplification_1", - "Broker", - id="agg_simplification_1", - ), - pytest.param( - agg_simplification_2, - "agg_simplification_2", - "Broker", - id="agg_simplification_2", - ), pytest.param( cumulative_stock_analysis, "cumulative_stock_analysis", @@ -283,9 +268,6 @@ def test_pydough_to_sql_tpch( "Broker", id="window_sliding_frame_relsum", ), - pytest.param( - get_part_single, "get_part_single", "Broker", id="get_part_single" - ), pytest.param( get_part_multiple, "get_part_multiple", "Broker", id="get_part_multiple" ), diff --git a/tests/test_sql_refsols/simplification_1_ansi.sql b/tests/test_sql_refsols/simplification_1_ansi.sql new file mode 100644 index 000000000..a07916fd6 --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_ansi.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + COUNT(*) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + COUNT(*) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_1_sqlite.sql b/tests/test_sql_refsols/simplification_1_sqlite.sql new file mode 100644 index 000000000..39f87c5f9 --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_sqlite.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + CAST(COUNT(*) AS REAL) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + CAST(COUNT(*) AS REAL) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer From a828aa99d858308ec8f0d344fe170746c6e8ba38 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 11:33:18 -0400 Subject: [PATCH 51/97] Fixing double-TPCH error handling --- pydough/unqualified/qualification.py | 3 ++- tests/test_qualification_errors.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 8179436c0..9c74c30b5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -580,6 +580,7 @@ def qualify_access( if ( isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name + and not is_child ) or ( isinstance(qualified_parent, ChildOperatorChildAccess) and isinstance(qualified_parent.child_access, GlobalContext) @@ -1007,7 +1008,7 @@ def qualify_partition( unqualified_parent, None ) qualified_parent: PyDoughCollectionQDAG = self.qualify_collection( - unqualified_parent, context, True, is_cross + unqualified_parent, context, False, is_cross ) qualified_child: PyDoughCollectionQDAG = self.qualify_collection( unqualified_child, qualified_parent, True, is_cross diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 8446904a0..e40cb94c0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -180,6 +180,11 @@ "Expected 2 arguments, received 1", id="bad_str_count_few_args", ), + pytest.param( + "result = TPCH.CALCULATE(x=COUNT(TPCH.nations) / COUNT(TPCH.regions))", + "Unrecognized term of TPCH: 'TPCH'. Did you mean: lines, parts, orders, nations, regions?", + id="double_graph", + ), ], ) def test_qualify_error( From 2914f9b4774adabf298300117b8444f17e0d25ee Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 12:22:54 -0400 Subject: [PATCH 52/97] overhauling some of the function call creation and error handling --- .../expression_function_operators.py | 4 +- .../pydough_operators/operator_registry.py | 24 +- pydough/unqualified/qualification.py | 18 +- pydough/unqualified/unqualified_node.py | 233 +++++++++--------- .../window_filter_order_8.txt | 2 +- .../window_filter_order_9.txt | 2 +- tests/test_relational.py | 4 +- tests/test_relational_expressions.py | 4 +- tests/test_unqualified_node.py | 3 +- 9 files changed, 139 insertions(+), 155 deletions(-) diff --git a/pydough/pydough_operators/expression_operators/expression_function_operators.py b/pydough/pydough_operators/expression_operators/expression_function_operators.py index f7fa76fa7..e5b3eaef2 100644 --- a/pydough/pydough_operators/expression_operators/expression_function_operators.py +++ b/pydough/pydough_operators/expression_operators/expression_function_operators.py @@ -33,7 +33,7 @@ def __init__( @property def key(self) -> str: - return f"FUNCTION-{self.function_name}" + return self.function_name @property def is_aggregation(self) -> bool: @@ -45,7 +45,7 @@ def function_name(self) -> str: @property def standalone_string(self) -> str: - return f"Function[{self.function_name}]" + return self.function_name def requires_enclosing_parens(self, parent) -> bool: return False diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index 2042bcaf7..dd88b02bc 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -11,7 +11,6 @@ from .base_operator import PyDoughOperator from .expression_operators import ( ExpressionFunctionOperator, - KeywordBranchingExpressionFunctionOperator, ) from .expression_operators import registered_expression_operators as REP @@ -58,27 +57,6 @@ def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): if op_name == name and op_name in REP.__all__ and obj.public: - operator = obj - break + return obj else: raise PyDoughUnqualifiedException(f"Operator {name} not found.") - - # Check if this is a keyword branching operator - if isinstance(operator, KeywordBranchingExpressionFunctionOperator): - # Find the matching implementation based on kwargs - impl: ExpressionFunctionOperator | None = operator.find_matching_implementation( - kwargs - ) - if impl is None: - kwarg_str = ", ".join(f"{k}={v!r}" for k, v in kwargs.items()) - raise PyDoughUnqualifiedException( - f"No matching implementation found for {name}({kwarg_str})." - ) - return impl - elif len(kwargs) > 0: - raise PyDoughUnqualifiedException( - f"PyDough function call {name} does not support " - "keyword arguments at this time." - ) - - return operator diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 9c74c30b5..fa269ad9d 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -8,12 +8,11 @@ from collections.abc import Iterable import pydough +import pydough.pydough_operators as pydop from pydough.configs import PyDoughConfigs from pydough.errors import PyDoughUnqualifiedException from pydough.metadata import GeneralJoinMetadata, GraphMetadata -from pydough.pydough_operators import get_operator_by_name from pydough.pydough_operators.expression_operators import ( - BinOp, ExpressionFunctionOperator, ExpressionWindowOperator, ) @@ -195,10 +194,7 @@ def qualify_binary_operation( goes wrong during the qualification process, e.g. a term cannot be qualified or is not recognized. """ - # Iterate across all the values of the BinOp enum to figure out which - # one correctly matches the BinOp specified by the operator. - operation: str = BinOp.from_string(unqualified._parcel[0]).name - operator = get_operator_by_name(operation) + operator: pydop.BinaryOperator = unqualified._parcel[0] # Independently qualify the LHS and RHS arguments unqualified_lhs: UnqualifiedNode = unqualified._parcel[1] unqualified_rhs: UnqualifiedNode = unqualified._parcel[2] @@ -439,7 +435,6 @@ def qualify_join_condition( The PyDough QDAG object for the qualified expression node for `condition`. """ - operation: str | None = None raw_term: PyDoughQDAG term: PyDoughExpressionQDAG term_name: str @@ -452,8 +447,7 @@ def qualify_join_condition( # qualification of binary operators except with using # `qualify_join_condition` on the inputs instead of # `qualify_expression`. - operation = BinOp.from_string(condition._parcel[0]).name - operator = get_operator_by_name(operation) + binop: pydop.BinaryOperator = condition._parcel[0] qualified_lhs: PyDoughExpressionQDAG = self.qualify_join_condition( condition._parcel[1], access, self_name, other_name ) @@ -461,14 +455,14 @@ def qualify_join_condition( condition._parcel[2], access, self_name, other_name ) return self.builder.build_expression_function_call( - operator, [qualified_lhs, qualified_rhs] + binop, [qualified_lhs, qualified_rhs] ) case UnqualifiedOperation(): # For function calls, invoke the same logic as for normal # qualification of function calls except with using # `qualify_join_condition` on the inputs instead of # `qualify_expression`. - operator = condition._parcel[0] + operator: pydop.PyDoughExpressionOperator = condition._parcel[0] unqualified_operands: list[UnqualifiedNode] = condition._parcel[1] qualified_operands: list[PyDoughQDAG] = [] for node in unqualified_operands: @@ -1172,7 +1166,7 @@ def qualify_best( kwargs: dict[str, object] = {"by": by, "allow_ties": allow_ties} if per: kwargs["per"] = per - rank: UnqualifiedNode = UnqualifiedOperator("RANKING")(**kwargs) + rank: UnqualifiedNode = UnqualifiedOperator(pydop.RANKING)(**kwargs) unqualified_cond: UnqualifiedNode = ( (rank == n_best) if n_best == 1 else (rank <= n_best) ) diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 2be3d852d..0825d9f05 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -30,7 +30,6 @@ from pydough.errors import PyDoughUnqualifiedException from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string from pydough.metadata import GraphMetadata -from pydough.pydough_operators import get_operator_by_name from pydough.types import ( ArrayType, BooleanType, @@ -125,8 +124,7 @@ def __getitem__(self, key): "PyDough objects are currently not supported to be used as indices in Python slices." ) args.append(coerced_elem) - operator = get_operator_by_name("SLICE") - return UnqualifiedOperation(operator, args) + return UnqualifiedOperation(pydop.SLICE, args) else: raise PyDoughUnqualifiedException( f"Cannot index into PyDough object {self} with {key!r}" @@ -144,99 +142,99 @@ def __bool__(self): def __add__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("+", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.ADD, self, other_unqualified) def __radd__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("+", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.ADD, other_unqualified, self) def __sub__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("-", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.SUB, self, other_unqualified) def __rsub__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("-", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.SUB, other_unqualified, self) def __mul__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("*", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.MUL, self, other_unqualified) def __rmul__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("*", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.MUL, other_unqualified, self) def __truediv__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("/", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.DIV, self, other_unqualified) def __rtruediv__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("/", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.DIV, other_unqualified, self) def __pow__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("**", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.POW, self, other_unqualified) def __rpow__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("**", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.POW, other_unqualified, self) def __mod__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("%", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.MOD, self, other_unqualified) def __rmod__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("%", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.MOD, other_unqualified, self) def __eq__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("==", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.EQU, self, other_unqualified) def __ne__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("!=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.NEQ, self, other_unqualified) def __lt__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("<", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.LET, self, other_unqualified) def __le__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("<=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.LEQ, self, other_unqualified) def __gt__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation(">", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.GRT, self, other_unqualified) def __ge__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation(">=", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.GEQ, self, other_unqualified) def __and__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("&", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BAN, self, other_unqualified) def __rand__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("&", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BAN, other_unqualified, self) def __or__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("|", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BOR, self, other_unqualified) def __ror__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("|", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BOR, other_unqualified, self) def __xor__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("^", self, other_unqualified) + return UnqualifiedBinaryOperation(pydop.BXR, self, other_unqualified) def __rxor__(self, other: object): other_unqualified: UnqualifiedNode = self.coerce_to_unqualified(other) - return UnqualifiedBinaryOperation("^", other_unqualified, self) + return UnqualifiedBinaryOperation(pydop.BXR, other_unqualified, self) def __pos__(self): return self @@ -245,8 +243,7 @@ def __neg__(self): return 0 - self def __invert__(self): - operator = get_operator_by_name("NOT") - return UnqualifiedOperation(operator, [self]) + return UnqualifiedOperation(pydop.NOT, [self]) def CALCULATE(self, *args, **kwargs: dict[str, object]): calc_args: list[tuple[str, UnqualifiedNode]] = [] @@ -268,15 +265,13 @@ def CALCULATE(self, *args, **kwargs: dict[str, object]): return UnqualifiedCalculate(self, calc_args) def __abs__(self): - operator = get_operator_by_name("ABS") - return UnqualifiedOperation(operator, [self]) + return UnqualifiedOperation(pydop.ABS, [self]) def __round__(self, n=None): if n is None: n = 0 n_unqualified = self.coerce_to_unqualified(n) - operator = get_operator_by_name("ROUND") - return UnqualifiedOperation(operator, [self, n_unqualified]) + return UnqualifiedOperation(pydop.ROUND, [self, n_unqualified]) def __floor__(self): raise PyDoughUnqualifiedException( @@ -447,24 +442,23 @@ class UnqualifiedRoot(UnqualifiedNode): """ def __init__(self, graph: GraphMetadata): - self._parcel: tuple[GraphMetadata, set[str]] = ( + func_map: dict[str, pydop.PyDoughOperator] = {} + for operator_name, operator in pydop.builtin_registered_operators().items(): + if not isinstance(operator, pydop.BinaryOperator): + func_map[operator_name] = operator + for operator_name in graph.get_function_names(): + func_map[operator_name] = graph.get_function(operator_name) + self._parcel: tuple[GraphMetadata, dict[str, pydop.PyDoughOperator]] = ( graph, - { - operator_name - for operator_name, operator in pydop.builtin_registered_operators().items() - if not isinstance(operator, pydop.BinaryOperator) - } - | set(graph.get_function_names()), + func_map, ) def __getattribute__(self, name: str) -> Any: - if name in super(UnqualifiedNode, self).__getattribute__("_parcel")[1]: - graph: GraphMetadata = super(UnqualifiedNode, self).__getattribute__( - "_parcel" - )[0] - if name in graph.get_function_names(): - return UnqualifiedOperator(name, graph.get_function(name)) - return UnqualifiedOperator(name) + func_map: dict[str, pydop.PyDoughOperator] = super( + UnqualifiedNode, self + ).__getattribute__("_parcel")[1] + if name in func_map: + return UnqualifiedOperator(func_map[name]) else: return super().__getattribute__(name) @@ -593,68 +587,21 @@ class UnqualifiedOperator(UnqualifiedNode): yet to be called. """ - def __init__( - self, name: str, operator: pydop.ExpressionFunctionOperator | None = None - ): - self._parcel: tuple[str, pydop.ExpressionFunctionOperator | None] = ( - name, - operator, - ) + def __init__(self, operator: pydop.PyDoughOperator): + self._parcel: tuple[pydop.PyDoughOperator] = (operator,) def __call__(self, *args, **kwargs): - per: str | None = None - window_operator: pydop.ExpressionWindowOperator - is_window: bool = True - operands: list[UnqualifiedNode] = [] - func_str: str = self._parcel[0] - for arg in args: - operands.append(self.coerce_to_unqualified(arg)) - match func_str: - case "PERCENTILE": - window_operator = pydop.PERCENTILE - is_positive_int.verify( - kwargs.get("n_buckets", 100), "`n_buckets` argument" - ) - case "RANKING": - window_operator = pydop.RANKING - is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") - is_bool.verify(kwargs.get("dense", False), "`dense` argument") - case "PREV" | "NEXT": - window_operator = pydop.PREV if func_str == "PREV" else pydop.NEXT - is_integer.verify(kwargs.get("n", 1), "`n` argument") - if len(args) > 1: - is_integer.verify(args[1], "`n` argument") - case "RELSUM": - window_operator = pydop.RELSUM - case "RELAVG": - window_operator = pydop.RELAVG - case "RELCOUNT": - window_operator = pydop.RELCOUNT - case "RELSIZE": - window_operator = pydop.RELSIZE - case func_str: - is_window = False - if self._parcel[1] is None: - operator = get_operator_by_name(func_str, **kwargs) - else: - operator = self._parcel[1] - if isinstance(operator, pydop.ExpressionWindowOperator): - window_operator = operator - is_window = True - if is_window: - by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, window_operator) - if "per" in kwargs: - per_arg = kwargs.pop("per") - is_string.verify(per_arg, "`per` argument") - per = per_arg - return UnqualifiedWindow( - window_operator, - operands, - by, - per, - kwargs, + operands: list[UnqualifiedNode] = [ + self.coerce_to_unqualified(arg) for arg in args + ] + if isinstance(self._parcel[0], pydop.ExpressionWindowOperator): + return call_window_operator(self._parcel[0], operands, **kwargs) + elif isinstance(self._parcel[0], pydop.ExpressionFunctionOperator): + return call_function_operator(self._parcel[0], operands, **kwargs) + else: + raise NotImplementedError( + f"Unsupported operator type: {self._parcel[0].__class__.__name__}" ) - return UnqualifiedOperation(operator, operands) class UnqualifiedOperation(UnqualifiedNode): @@ -701,8 +648,10 @@ class UnqualifiedBinaryOperation(UnqualifiedNode): Variant of UnqualifiedOperation specifically for builtin Python binops. """ - def __init__(self, operator: str, lhs: UnqualifiedNode, rhs: UnqualifiedNode): - self._parcel: tuple[str, UnqualifiedNode, UnqualifiedNode] = ( + def __init__( + self, operator: pydop.BinaryOperator, lhs: UnqualifiedNode, rhs: UnqualifiedNode + ): + self._parcel: tuple[pydop.BinaryOperator, UnqualifiedNode, UnqualifiedNode] = ( operator, lhs, rhs, @@ -868,7 +817,7 @@ def display_raw(unqualified: UnqualifiedNode) -> str: case _: return repr(literal_value) case UnqualifiedOperator(): - return unqualified._parcel[0] + return repr(unqualified._parcel[0]) case UnqualifiedOperation(): operands_str = ", ".join( [display_raw(operand) for operand in unqualified._parcel[1]] @@ -885,7 +834,7 @@ def display_raw(unqualified: UnqualifiedNode) -> str: operands_str += f", {kwarg}={val!r}" return f"{unqualified._parcel[0].function_name}({operands_str})" case UnqualifiedBinaryOperation(): - return f"({display_raw(unqualified._parcel[1])} {unqualified._parcel[0]} {display_raw(unqualified._parcel[2])})" + return f"({display_raw(unqualified._parcel[1])} {unqualified._parcel[0].binop.value} {display_raw(unqualified._parcel[2])})" case UnqualifiedCollation(): method: str = "ASC" if unqualified._parcel[1] else "DESC" pos: str = "'last'" if unqualified._parcel[2] else "'first'" @@ -935,3 +884,67 @@ def display_raw(unqualified: UnqualifiedNode) -> str: raise PyDoughUnqualifiedException( f"Unsupported unqualified node: {unqualified.__class__.__name__}" ) + + +def call_function_operator( + operator: pydop.ExpressionFunctionOperator, + operands: list[UnqualifiedNode], + **kwargs, +) -> UnqualifiedNode: + """ + TODO + """ + + # Check if this is a keyword branching operator + if isinstance(operator, pydop.KeywordBranchingExpressionFunctionOperator): + # Find the matching implementation based on kwargs + impl: pydop.ExpressionFunctionOperator | None = ( + operator.find_matching_implementation(kwargs) + ) + if impl is None: + kwarg_str = ", ".join(f"{k}={v!r}" for k, v in kwargs.items()) + raise PyDoughUnqualifiedException( + f"No matching implementation found for {operator.function_name}({kwarg_str})." + ) + operator = impl + + # Otherwise, verify there are no keyword arguments + elif len(kwargs) > 0: + raise PyDoughUnqualifiedException( + f"PyDough function {operator.function_name} does not support " + "keyword arguments at this time." + ) + + return UnqualifiedOperation(operator, operands) + + +def call_window_operator( + operator: pydop.ExpressionWindowOperator, operands: list[UnqualifiedNode], **kwargs +) -> UnqualifiedNode: + """ + TODO + """ + match operator: + case pydop.PERCENTILE: + is_positive_int.verify(kwargs.get("n_buckets", 100), "`n_buckets` argument") + case pydop.RANKING: + is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") + is_bool.verify(kwargs.get("dense", False), "`dense` argument") + case pydop.PREV | pydop.NEXT: + is_integer.verify(kwargs.get("n", 1), "`n` argument") + if len(operands) > 1: + is_integer.verify(operands[1], "`n` argument") + + by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, operator) + per: str | None = None + if "per" in kwargs: + per_arg = kwargs.pop("per") + is_string.verify(per_arg, "`per` argument") + per = per_arg + return UnqualifiedWindow( + operator, + operands, + by, + per, + kwargs, + ) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index c244afae3..f566a2e45 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) + FILTER(condition=ABSENT(n_rows) & c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), columns={}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 26542bd19..fe91ae9f3 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]) & ABSENT(expr_0), columns={}) + FILTER(condition=ABSENT(expr_0) & o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]), columns={}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_relational.py b/tests/test_relational.py index 524c7504c..12cc43777 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -528,7 +528,7 @@ def test_invalid_limit(literal: LiteralExpression) -> None: ) }, ), - "AGGREGATE(keys={'a': Column(name=a, type=UnknownType())}, aggregations={'b': Call(op=Function[SUM], inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", + "AGGREGATE(keys={'a': Column(name=a, type=UnknownType())}, aggregations={'b': Call(op=SUM, inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", id="key_and_agg", ), pytest.param( @@ -556,7 +556,7 @@ def test_invalid_limit(literal: LiteralExpression) -> None: ), }, ), - "AGGREGATE(keys={}, aggregations={'a': Call(op=Function[SUM], inputs=[Column(name=a, type=NumericType())], return_type=NumericType()), 'b': Call(op=Function[SUM], inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", + "AGGREGATE(keys={}, aggregations={'a': Call(op=SUM, inputs=[Column(name=a, type=NumericType())], return_type=NumericType()), 'b': Call(op=SUM, inputs=[Column(name=b, type=NumericType())], return_type=NumericType())})", id="no_keys", ), pytest.param( diff --git a/tests/test_relational_expressions.py b/tests/test_relational_expressions.py index c7137a199..535af525b 100644 --- a/tests/test_relational_expressions.py +++ b/tests/test_relational_expressions.py @@ -243,12 +243,12 @@ def test_expression_sort_info_equals( [ pytest.param( CallExpression(LOWER, StringType(), [ColumnReference("a", StringType())]), - "Call(op=Function[LOWER], inputs=[Column(name=a, type=StringType())], return_type=StringType())", + "Call(op=LOWER, inputs=[Column(name=a, type=StringType())], return_type=StringType())", id="lower", ), pytest.param( CallExpression(SUM, NumericType(), [ColumnReference("a", NumericType())]), - "Call(op=Function[SUM], inputs=[Column(name=a, type=NumericType())], return_type=NumericType())", + "Call(op=SUM, inputs=[Column(name=a, type=NumericType())], return_type=NumericType())", id="sum", ), ], diff --git a/tests/test_unqualified_node.py b/tests/test_unqualified_node.py index 1e04d6d29..7c527a44e 100644 --- a/tests/test_unqualified_node.py +++ b/tests/test_unqualified_node.py @@ -740,8 +740,7 @@ def test_init_pydough_context( pytest.param( bad_unsupported_kwarg3, re.escape( - "PyDough function call SUM does not support " - "keyword arguments at this time." + "PyDough function SUM does not support keyword arguments at this time." ), id="bad_unsupported_kwarg3", ), From 9f0961d0fd677f60e026d10d12ed8f648c10f35f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 12:49:29 -0400 Subject: [PATCH 53/97] Moved function mismatch errors to use min edit distance --- pydough/errors/error_utils.py | 124 +++++++++++++++++ pydough/errors/pydough_error_builder.py | 49 ++++++- .../pydough_operators/operator_registry.py | 24 +--- pydough/qdag/collections/collection_qdag.py | 127 +----------------- pydough/unqualified/unqualified_node.py | 5 +- tests/test_metadata_errors.py | 2 +- tests/test_qualification_errors.py | 12 +- 7 files changed, 195 insertions(+), 148 deletions(-) diff --git a/pydough/errors/error_utils.py b/pydough/errors/error_utils.py index b679fa217..84142b73b 100644 --- a/pydough/errors/error_utils.py +++ b/pydough/errors/error_utils.py @@ -19,6 +19,7 @@ "extract_integer", "extract_object", "extract_string", + "find_possible_name_matches", "is_bool", "is_integer", "is_json_array", @@ -33,6 +34,8 @@ from abc import ABC, abstractmethod +import numpy as np + from .error_types import PyDoughMetadataException ############################################################################### @@ -443,3 +446,124 @@ def extract_object(json_obj: dict, key_name: str, obj_name: str) -> dict: value = json_obj[key_name] assert isinstance(value, dict) return value + + +############################################################################### +# Name Suggestion Utilities +############################################################################### + + +def min_edit_distance(s: str, t: str) -> float: + """ + Computes the minimum edit distance between two strings using the + Levenshtein distance algorithm. Substituting a character for the same + character with different capitalization is considered 10% of the edit + cost of replacing it with any other character. For this implementation + the iterative with a 2-row array is used to save memory. + Link: + https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows + + Args: + `s`: The first string. + `t`: The second string. + + Returns: + The minimum edit distance between the two strings. + """ + # Ensures str1 is the shorter string + if len(s) > len(t): + s, t = t, s + m, n = len(s), len(t) + + # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only + # need to consider the previous row to generate the next row, therefore the + # same two rows can be recycled + + row, previousRow = 1, 0 + arr = np.zeros((2, m + 1), dtype=float) + + # MED(X, "") = len(X) + arr[0, :] = np.arange(m + 1) + + for i in range(1, n + 1): + # MED("", X) = len(X) + arr[row, 0] = i + + # Loop over the rest of s to see if it matches with the corresponding + # letter of t + for j in range(1, m + 1): + substitution_cost: float + + if s[j - 1] == t[i - 1]: + substitution_cost = 0.0 + elif s[j - 1].lower() == t[i - 1].lower(): + substitution_cost = 0.1 + else: + substitution_cost = 1.0 + + arr[row, j] = min( + arr[row, j - 1] + 1.0, + arr[previousRow, j] + 1.0, + arr[previousRow, j - 1] + substitution_cost, + ) + + row, previousRow = previousRow, row + + return arr[previousRow, m] # Return the last computed row's last element + + +def find_possible_name_matches( + term_name: str, candidates: set[str], atol: int, rtol: float, min_names: int +) -> list[str]: + """ + Finds and returns a list of candidate names that closely match the + given name based on minimum edit distance. + + Args: + `term_name`: The name to match against the list of candidates. + `candidates`: A set of candidate names to search for matches. + `atol`: The absolute tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match + atol` will be included in the results. + `rtol`: The relative tolerance for the minimum edit distance; any + candidate with a minimum edit distance less than or equal to + `closest_match * (1 + rtol)` will be included in the results. + `min_names`: The minimum number of names to return. + + Returns: + A list of candidate names, based on the closest matches. + """ + + terms_distance_list: list[tuple[float, str]] = [] + + for term in candidates: + # get the minimum edit distance + me: float = min_edit_distance(term_name, term) + terms_distance_list.append((me, term)) + + if terms_distance_list == []: + return [] + # sort the list by minimum edit distance break ties by name + terms_distance_list.sort() + + closest_match = terms_distance_list[0] + + # List with all names that have a me <= closest_match + atol + matches_within_atol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] + atol + ] + + # List with all names that have a me <= closest_match * 1.1 + matches_within_rtol: list[str] = [ + name for me, name in terms_distance_list if me <= closest_match[0] * (1 + rtol) + ] + + # List with the top 3 closest matches (me) breaking ties by name + min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] + + # Return whichever of the three lists is the longest, breaking ties + # lexicographically by the names within. + return max( + [matches_within_atol, matches_within_rtol, min_matches], + key=lambda x: (len(x), x), + ) diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 0a560b520..c9c0fbbfd 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -4,12 +4,19 @@ from typing import TYPE_CHECKING -from pydough.errors import PyDoughException, PyDoughQDAGException, PyDoughSQLException +from pydough.errors import ( + PyDoughException, + PyDoughQDAGException, + PyDoughSQLException, + PyDoughUnqualifiedException, +) +from pydough.errors.error_utils import find_possible_name_matches if TYPE_CHECKING: from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG from pydough.relational import CallExpression + from pydough.unqualified import UnqualifiedNode class PyDoughErrorBuilder: @@ -230,3 +237,43 @@ def sql_call_conversion_error( return PyDoughQDAGException( f"Failed to convert expression {call.to_string(True)} to SQL: {error}" ) + + def undefined_function_call( + self, node: "UnqualifiedNode", *args, **kwargs + ) -> PyDoughException: + """ + Creates an exception for when a function call is made on an unqualified + node that is not callable. + + Args: + `node`: The unqualified node that was called as if it were a + function. + `*args`: Positional arguments passed to the call. + `**kwargs`: Keyword arguments passed to the call. + + Returns: + An exception indicating that the node is not callable. + """ + from pydough.unqualified import UnqualifiedAccess, UnqualifiedRoot + + error_message: str = f"PyDough object {node!r} is not callable." + # If in the form root.XXX, then it is possible that XXXX is a typo of + # a function name. + if isinstance(node, UnqualifiedAccess) and isinstance( + node._parcel[0], UnqualifiedRoot + ): + suggestions: list[str] = find_possible_name_matches( + term_name=node._parcel[1], + candidates=set(node._parcel[0]._parcel[1]), + atol=1, + rtol=0.1, + min_names=3, + ) + + # Check if there are any suggestions to add + if len(suggestions) > 0: + suggestions_str: str = ", ".join(suggestions) + error_message += f" Did you mean: {suggestions_str}?" + else: + error_message += " Did you mean to use a function?" + return PyDoughUnqualifiedException(error_message) diff --git a/pydough/pydough_operators/operator_registry.py b/pydough/pydough_operators/operator_registry.py index dd88b02bc..779f9acb4 100644 --- a/pydough/pydough_operators/operator_registry.py +++ b/pydough/pydough_operators/operator_registry.py @@ -27,36 +27,24 @@ def builtin_registered_operators() -> dict[str, PyDoughOperator]: return operators -def get_operator_by_name(name: str, **kwargs) -> ExpressionFunctionOperator: +def get_operator_by_name(name: str) -> ExpressionFunctionOperator: """ Retrieves a registered PyDough operator by its a name. - This function searches for an operator within the registered expression - operators. If the operator is a `KeywordBranchingExpressionFunctionOperator`, - it will attempt to find a specific implementation that matches the provided - keyword arguments. - Args: name: The name of the operator to retrieve. - **kwargs: Keyword arguments that may be used to select a specific - implementation if the operator is a - `KeywordBranchingExpressionFunctionOperator`. Returns: - The `ExpressionFunctionOperator` corresponding to the given name and - keyword arguments. + The `ExpressionFunctionOperator` corresponding to the given name. Raises: - PyDoughUnqualifiedException: If the operator with the given name is - not found, or if no matching implementation is found for a - `KeywordBranchingExpressionFunctionOperator` with the provided - keyword arguments, or if keyword arguments are provided for an - operator that does not support them. + `PyDoughUnqualifiedException`: If the operator with the given name is + not found. """ # Find the operator directly using inspect for op_name, obj in inspect.getmembers(REP): if op_name == name and op_name in REP.__all__ and obj.public: return obj - else: - raise PyDoughUnqualifiedException(f"Operator {name} not found.") + # If not found, raise an exception + raise PyDoughUnqualifiedException(f"Operator {name} not found.") diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index a68ba1468..e5f16077b 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -10,9 +10,8 @@ from functools import cache, cached_property from typing import Union -import numpy as np - import pydough +from pydough.errors.error_utils import find_possible_name_matches from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG from pydough.qdag.expressions.collation_expression import CollationExpression from pydough.qdag.expressions.expression_qdag import PyDoughExpressionQDAG @@ -357,122 +356,6 @@ def to_tree_string(self) -> str: """ return "\n".join(self.to_tree_form(True).to_string_rows()) - def find_possible_name_matches( - self, term_name: str, atol: int, rtol: float, min_names: int - ) -> list[str]: - """ - Finds and returns a list of candidate names that closely match the - given name based on minimum edit distance. - - Args: - `term_name`: The name to match against the list of candidates. - `atol`: The absolute tolerance for the minimum edit distance; any - candidate with a minimum edit distance less than or equal to - `closest_match + atol` will be included in the results. - `rtol`: The relative tolerance for the minimum edit distance; any - candidate with a minimum edit distance less than or equal to - `closest_match * (1 + rtol)` will be included in the results. - `min_names`: The minimum number of names to return. - - Returns: - A list of candidate names, based on the closest matches. - """ - - terms_distance_list: list[tuple[float, str]] = [] - - for term in self.all_terms: - # get the minimum edit distance - me: float = self.min_edit_distance(term_name, term) - terms_distance_list.append((me, term)) - - if terms_distance_list == []: - return [] - # sort the list by minimum edit distance break ties by name - terms_distance_list.sort() - - closest_match = terms_distance_list[0] - - # List with all names that have a me <= closest_match + atol - matches_within_atol: list[str] = [ - name for me, name in terms_distance_list if me <= closest_match[0] + atol - ] - - # List with all names that have a me <= closest_match * 1.1 - matches_within_rtol: list[str] = [ - name - for me, name in terms_distance_list - if me <= closest_match[0] * (1 + rtol) - ] - - # List with the top 3 closest matches (me) breaking ties by name - min_matches: list[str] = [name for _, name in terms_distance_list[:min_names]] - - # Return whichever of the three lists is the longest, breaking ties - # lexicographically by the names within. - return max( - [matches_within_atol, matches_within_rtol, min_matches], - key=lambda x: (len(x), x), - ) - - @staticmethod - def min_edit_distance(s: str, t: str) -> float: - """ - Computes the minimum edit distance between two strings using the - Levenshtein distance algorithm. Substituting a character for the same - character with different capitalization is considered 10% of the edit - cost of replacing it with any other character. For this implementation - the iterative with a 2-row array is used to save memory. - Link: - https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows - - Args: - `s`: The first string. - `t`: The second string. - - Returns: - The minimum edit distance between the two strings. - """ - # Ensures str1 is the shorter string - if len(s) > len(t): - s, t = t, s - m, n = len(s), len(t) - - # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only - # need to consider the previous row to generate the next row, therefore the - # same two rows can be recycled - - row, previousRow = 1, 0 - arr = np.zeros((2, m + 1), dtype=float) - - # MED(X, "") = len(X) - arr[0, :] = np.arange(m + 1) - - for i in range(1, n + 1): - # MED("", X) = len(X) - arr[row, 0] = i - - # Loop over the rest of s to see if it matches with the corresponding - # letter of t - for j in range(1, m + 1): - substitution_cost: float - - if s[j - 1] == t[i - 1]: - substitution_cost = 0.0 - elif s[j - 1].lower() == t[i - 1].lower(): - substitution_cost = 0.1 - else: - substitution_cost = 1.0 - - arr[row, j] = min( - arr[row, j - 1] + 1.0, - arr[previousRow, j] + 1.0, - arr[previousRow, j - 1] + substitution_cost, - ) - - row, previousRow = previousRow, row - - return arr[previousRow, m] # Return the last computed row's last element - def name_mismatch_error( self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 ) -> str: @@ -496,8 +379,12 @@ def name_mismatch_error( """ error_message: str = f"Unrecognized term of {self.to_string()}: {term_name!r}." - suggestions: list[str] = self.find_possible_name_matches( - term_name=term_name, atol=atol, rtol=rtol, min_names=min_names + suggestions: list[str] = find_possible_name_matches( + term_name=term_name, + candidates=self.all_terms, + atol=atol, + rtol=rtol, + min_names=min_names, ) # Check if there are any suggestions to add diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 0825d9f05..4dc1f57cc 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -26,6 +26,7 @@ from datetime import date, datetime from typing import Any, Union +import pydough import pydough.pydough_operators as pydop from pydough.errors import PyDoughUnqualifiedException from pydough.errors.error_utils import is_bool, is_integer, is_positive_int, is_string @@ -131,8 +132,8 @@ def __getitem__(self, key): ) def __call__(self, *args, **kwargs): - raise PyDoughUnqualifiedException( - f"PyDough nodes {self!r} is not callable. Did you mean to use a function?" + raise pydough.active_session.error_builder.undefined_function_call( + self, *args, **kwargs ) def __bool__(self): diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index 3a873fc4d..e079b59e3 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -765,7 +765,7 @@ def test_invalid_graphs( ), pytest.param( "parent.sub4", - "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough nodes is_prime is not callable. Did you mean to use a function?)", + "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: DATETIME, SLICE, ISIN, STRING, STRIP, DAYNAME, KEEP_IF, LIKE, QUANTILE, RELSIZE, REPLACE, SIGN, SUM, PREV, SQRT?)", id="bad_syntax_3", ), pytest.param( diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index e40cb94c0..8d7da66f0 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -29,7 +29,7 @@ ), pytest.param( "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", - "PyDough nodes FIZZBUZZ is not callable. Did you mean to use a function?", + "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, MINUTE, ABS, COUNT, FLOAT, FLOOR, HOUR, IFF, INTEGER, ISIN, LIKE, MIN, RELCOUNT, RELSIZE, RELSUM, ROUND, SIGN, STRCOUNT, SUM?", id="non_function", ), pytest.param( @@ -54,7 +54,7 @@ ), pytest.param( "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", - "PyDough nodes SUB is not callable. Did you mean to use a function?", + "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS?", id="binop_function_call", ), pytest.param( @@ -122,22 +122,22 @@ ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", - "PyDough nodes SAMPLE_VAR is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_VAR is not callable. Did you mean: MEDIAN, PREV, SMALLEST, UPPER, VAR, YEAR, ABSENT, AVG, DATEDIFF, DATETIME, FLOAT, FLOOR, GETPART, INTEGER, KEEP_IF, LARGEST, LENGTH, LOWER, LPAD, MAX, POWER, PRESENT, QUARTER, RELAVG, REPLACE, RPAD, SECOND, SLICE, SUM?", id="kwargfunc_1", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough nodes SAMPLE_VARIANCE is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: MEDIAN, REPLACE, SLICE, DATETIME, JOIN_STRINGS, STRING?", id="kwargfunc_2", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", - "PyDough nodes SAMPLE_STD is not callable. Did you mean to use a function?", + "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, LARGEST, ABSENT?", id="kwargfunc_3", ), pytest.param( "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", - "PyDough nodes POPULATION_STD is not callable. Did you mean to use a function?", + "PyDough object POPULATION_STD is not callable. Did you mean: CONTAINS, COUNT, DEFAULT_TO, JOIN_STRINGS, LARGEST, MONOTONIC, NDISTINCT, ROUND?", id="kwargfunc_4", ), pytest.param( From 2e3030000d78305dd5a79c377025eb370ab94634 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 13:55:19 -0400 Subject: [PATCH 54/97] Adjusting tuning of min edit distance errors --- pydough/errors/error_utils.py | 50 +++++++++++++++------ pydough/errors/pydough_error_builder.py | 17 ++++++- pydough/qdag/collections/collection_qdag.py | 21 ++++++++- tests/test_exploration.py | 4 +- tests/test_metadata_errors.py | 2 +- tests/test_qdag_collection_errors.py | 6 +-- tests/test_qualification_errors.py | 14 +++--- 7 files changed, 85 insertions(+), 29 deletions(-) diff --git a/pydough/errors/error_utils.py b/pydough/errors/error_utils.py index 84142b73b..316cc7afa 100644 --- a/pydough/errors/error_utils.py +++ b/pydough/errors/error_utils.py @@ -453,7 +453,14 @@ def extract_object(json_obj: dict, key_name: str, obj_name: str) -> dict: ############################################################################### -def min_edit_distance(s: str, t: str) -> float: +def min_edit_distance( + s: str, + t: str, + insert_cost: float, + delete_cost: float, + substitution_cost: float, + capital_cost: float, +) -> float: """ Computes the minimum edit distance between two strings using the Levenshtein distance algorithm. Substituting a character for the same @@ -466,13 +473,15 @@ def min_edit_distance(s: str, t: str) -> float: Args: `s`: The first string. `t`: The second string. + `insert_cost`: The cost of inserting a character into the first string. + `delete_cost`: The cost of deleting a character from the first string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: The minimum edit distance between the two strings. """ - # Ensures str1 is the shorter string - if len(s) > len(t): - s, t = t, s m, n = len(s), len(t) # Use a 2 x (m + 1) array to represent an n x (m + 1) array since you only @@ -492,19 +501,19 @@ def min_edit_distance(s: str, t: str) -> float: # Loop over the rest of s to see if it matches with the corresponding # letter of t for j in range(1, m + 1): - substitution_cost: float + sub_cost: float if s[j - 1] == t[i - 1]: - substitution_cost = 0.0 + sub_cost = 0.0 elif s[j - 1].lower() == t[i - 1].lower(): - substitution_cost = 0.1 + sub_cost = capital_cost else: - substitution_cost = 1.0 + sub_cost = substitution_cost arr[row, j] = min( - arr[row, j - 1] + 1.0, - arr[previousRow, j] + 1.0, - arr[previousRow, j - 1] + substitution_cost, + arr[row, j - 1] + insert_cost, + arr[previousRow, j] + delete_cost, + arr[previousRow, j - 1] + sub_cost, ) row, previousRow = previousRow, row @@ -513,7 +522,15 @@ def min_edit_distance(s: str, t: str) -> float: def find_possible_name_matches( - term_name: str, candidates: set[str], atol: int, rtol: float, min_names: int + term_name: str, + candidates: set[str], + atol: int, + rtol: float, + min_names: int, + insert_cost: float, + delete_cost: float, + substitution_cost: float, + capital_cost: float, ) -> list[str]: """ Finds and returns a list of candidate names that closely match the @@ -529,6 +546,11 @@ def find_possible_name_matches( candidate with a minimum edit distance less than or equal to `closest_match * (1 + rtol)` will be included in the results. `min_names`: The minimum number of names to return. + `insert_cost`: The cost of inserting a character into the first string. + `delete_cost`: The cost of deleting a character from the first string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: A list of candidate names, based on the closest matches. @@ -538,7 +560,9 @@ def find_possible_name_matches( for term in candidates: # get the minimum edit distance - me: float = min_edit_distance(term_name, term) + me: float = min_edit_distance( + term_name, term, insert_cost, delete_cost, substitution_cost, capital_cost + ) terms_distance_list.append((me, term)) if terms_distance_list == []: diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index c9c0fbbfd..9582f0295 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -42,7 +42,16 @@ def term_not_found( An exception indicating that the term was not found. """ return PyDoughQDAGException( - collection.name_mismatch_error(term_name, atol=2, rtol=0.1, min_names=3) + collection.name_mismatch_error( + term_name, + atol=2, + rtol=0.1, + min_names=3, + insert_cost=0.5, + delete_cost=1.0, + substitution_cost=1.0, + capital_cost=0.1, + ) ) def down_streaming_conflict( @@ -265,9 +274,13 @@ def undefined_function_call( suggestions: list[str] = find_possible_name_matches( term_name=node._parcel[1], candidates=set(node._parcel[0]._parcel[1]), - atol=1, + atol=2, rtol=0.1, min_names=3, + insert_cost=0.5, + delete_cost=1.0, + substitution_cost=1.0, + capital_cost=0.1, ) # Check if there are any suggestions to add diff --git a/pydough/qdag/collections/collection_qdag.py b/pydough/qdag/collections/collection_qdag.py index e5f16077b..ede9ab58b 100644 --- a/pydough/qdag/collections/collection_qdag.py +++ b/pydough/qdag/collections/collection_qdag.py @@ -357,7 +357,15 @@ def to_tree_string(self) -> str: return "\n".join(self.to_tree_form(True).to_string_rows()) def name_mismatch_error( - self, term_name: str, atol: int = 2, rtol: float = 0.1, min_names: int = 3 + self, + term_name: str, + atol: int = 2, + rtol: float = 0.1, + min_names: int = 3, + insert_cost: float = 1.0, + delete_cost: float = 1.0, + substitution_cost: float = 1.0, + capital_cost: float = 0.1, ) -> str: """ Raises a name mismatch error with suggestions if possible. @@ -373,6 +381,13 @@ def name_mismatch_error( names with a minimum edit distance less than or equal to `closest_match * (1 + rtol)` will be included as a suggestion. `min_names`: The minimum number of suggestions to include. + `insert_cost`: The cost of inserting a character into the first + string. + `delete_cost`: The cost of deleting a character from the first + string. + `substitution_cost`: The cost of substituting a character. + `capital_cost`: The cost of substituting a character with the same + character with different capitalization. Returns: A string describing the error, including suggestions if available. @@ -385,6 +400,10 @@ def name_mismatch_error( atol=atol, rtol=rtol, min_names=min_names, + insert_cost=insert_cost, + delete_cost=delete_cost, + substitution_cost=substitution_cost, + capital_cost=capital_cost, ) # Check if there are any suggestions to add diff --git a/tests/test_exploration.py b/tests/test_exploration.py index e74e11ef1..1dd10ef21 100644 --- a/tests/test_exploration.py +++ b/tests/test_exploration.py @@ -1222,13 +1222,13 @@ def test_graph_structure( "TPCH", contextless_collections_impl, """ -Unrecognized term of TPCH: 'line_items'. Did you mean: lines, nations, regions, suppliers? +Unrecognized term of TPCH: 'line_items'. Did you mean: lines, parts, regions? This could mean you accessed a property using a name that does not exist, or that you need to place your PyDough code into a context for it to make sense. Did you mean to use pydough.explain_term? """, """ -Unrecognized term of TPCH: 'line_items'. Did you mean: lines, nations, regions, suppliers? +Unrecognized term of TPCH: 'line_items'. Did you mean: lines, parts, regions? This could mean you accessed a property using a name that does not exist, or that you need to place your PyDough code into a context for it to make sense. Did you mean to use pydough.explain_term? diff --git a/tests/test_metadata_errors.py b/tests/test_metadata_errors.py index e079b59e3..3214fb8fb 100644 --- a/tests/test_metadata_errors.py +++ b/tests/test_metadata_errors.py @@ -765,7 +765,7 @@ def test_invalid_graphs( ), pytest.param( "parent.sub4", - "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: DATETIME, SLICE, ISIN, STRING, STRIP, DAYNAME, KEEP_IF, LIKE, QUANTILE, RELSIZE, REPLACE, SIGN, SUM, PREV, SQRT?)", + "Malformed general join condition: 'is_prime(self.j1) != is_prime(self.j2)' (PyDough object is_prime is not callable. Did you mean: ISIN, LIKE, SUM, SLICE, STRIP, IFF, MIN, VAR, PREV, SIGN, SQRT, STRING, ABS, CEIL, FIND, HAS, HOUR, LPAD, RPAD, STD, YEAR, UPPER, DATETIME?)", id="bad_syntax_3", ), pytest.param( diff --git a/tests/test_qdag_collection_errors.py b/tests/test_qdag_collection_errors.py index 3ba8a92dd..7c0d9c72c 100644 --- a/tests/test_qdag_collection_errors.py +++ b/tests/test_qdag_collection_errors.py @@ -29,12 +29,12 @@ [ pytest.param( TableCollectionInfo("Rainbows"), - "Unrecognized term of TPCH: 'Rainbows'. Did you mean: lines, nations, regions, parts, orders?", + "Unrecognized term of TPCH: 'Rainbows'. Did you mean: lines, nations, regions, parts", id="table_dne", ), pytest.param( TableCollectionInfo("regions") ** SubCollectionInfo("postage_stamps"), - "Unrecognized term of TPCH.regions: 'postage_stamps'. Did you mean: comment, nations, name, key?", + "Unrecognized term of TPCH.regions: 'postage_stamps'. Did you mean: name, comment, key, nations?", id="subcollection_dne", ), pytest.param( @@ -47,7 +47,7 @@ TableCollectionInfo("nations") ** SubCollectionInfo("suppliers") ** CalculateInfo([], foo=ReferenceInfo("region_key")), - "Unrecognized term of TPCH.nations.suppliers: 'region_key'. Did you mean: nation_key, key, lines?", + "Unrecognized term of TPCH.nations.suppliers: 'region_key'. Did you mean: nation_key, key, lines, phone, nation?", id="reference_bad_ancestry", ), pytest.param( diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index 8d7da66f0..de266738c 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -24,12 +24,12 @@ [ pytest.param( "result = nations.CALCULATE(nation_name=name, total_balance=SUM(account_balance))", - "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: comment, customers, name, region_key, suppliers, region?", + "Unrecognized term of TPCH.nations: 'account_balance'. Did you mean: name, comment, key, region, customers, region_key?", id="bad_name", ), pytest.param( "result = nations.CALCULATE(nation_name=FIZZBUZZ(name))", - "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, MINUTE, ABS, COUNT, FLOAT, FLOOR, HOUR, IFF, INTEGER, ISIN, LIKE, MIN, RELCOUNT, RELSIZE, RELSUM, ROUND, SIGN, STRCOUNT, SUM?", + "PyDough object FIZZBUZZ is not callable. Did you mean: FIND, ABS, MIN, SUM, HOUR, IFF, LIKE, MINUTE, SIGN, AVG, CEIL, COUNT, DAY, FLOAT, FLOOR, HAS, ISIN, MAX, NOT, ROUND, STD, VAR, LPAD, NEXT, PREV, RELSUM, RPAD, SLICE, SQRT, YEAR?", id="non_function", ), pytest.param( @@ -54,7 +54,7 @@ ), pytest.param( "lines.CALCULATE(v=MUL(extended_price, SUB(1, discount)))", - "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS?", + "PyDough object SUB is not callable. Did you mean: SUM, STD, ABS, AVG, DAY, HAS, HOUR, IFF, ISIN, MAX, MIN, NOT, SIGN, SQRT, VAR?", id="binop_function_call", ), pytest.param( @@ -122,22 +122,22 @@ ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", - "PyDough object SAMPLE_VAR is not callable. Did you mean: MEDIAN, PREV, SMALLEST, UPPER, VAR, YEAR, ABSENT, AVG, DATEDIFF, DATETIME, FLOAT, FLOOR, GETPART, INTEGER, KEEP_IF, LARGEST, LENGTH, LOWER, LPAD, MAX, POWER, PRESENT, QUARTER, RELAVG, REPLACE, RPAD, SECOND, SLICE, SUM?", + "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", id="kwargfunc_1", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: MEDIAN, REPLACE, SLICE, DATETIME, JOIN_STRINGS, STRING?", + "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: SLICE, REPLACE, MEDIAN, SIGN, STRING, YEAR, ISIN, MIN, STRIP, SUM, UPPER, VAR?", id="kwargfunc_2", ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_STD(suppliers.account_balance))", - "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, LARGEST, ABSENT?", + "PyDough object SAMPLE_STD is not callable. Did you mean: SMALLEST, STD, HAS, LARGEST, SUM, ABS, LPAD, NEXT, RPAD, SECOND, SQRT, ABSENT, DAY, FLOAT, MAX, NOT, SLICE, UPPER, VAR?", id="kwargfunc_3", ), pytest.param( "result = nations.CALCULATE(name=name, std=POPULATION_STD(suppliers.account_balance))", - "PyDough object POPULATION_STD is not callable. Did you mean: CONTAINS, COUNT, DEFAULT_TO, JOIN_STRINGS, LARGEST, MONOTONIC, NDISTINCT, ROUND?", + "COUNT, ROUND, CONTAINS, FIND, LPAD, RPAD, FLOAT, HAS, MIN, MONTH, NOT, STD, HASNOT, HOUR, ISIN, MINUTE, SECOND, SIGN, ABS, DAY, DEFAULT_TO, FLOOR, LARGEST, MAX, MONOTONIC, NDISTINCT, POWER, PRESENT, QUANTILE, RELCOUNT, REPLACE, SLICE, STRING, SUM, VAR?", id="kwargfunc_4", ), pytest.param( From 75c3b7c06cfee0ff85c5fc7a42bcaf4cfdf494c6 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:22:21 -0400 Subject: [PATCH 55/97] Messing with function handling, VARIANCE name, error tuning --- pydough/pydough_operators/__init__.py | 8 +-- .../expression_operators/README.md | 6 +-- .../expression_operators/__init__.py | 8 +-- .../registered_expression_operators.py | 8 +-- .../collections/augmenting_child_operator.py | 1 - .../base_transform_bindings.py | 4 +- tests/test_pipeline_tpch_custom.py | 49 ++++++++++--------- tests/test_plan_refsols/simple_var_std.txt | 2 +- .../simple_var_std_with_nulls.txt | 2 +- .../all_pydough_functions_dialects.py | 6 ++- tests/test_qualification_errors.py | 4 +- .../aggregation_functions_ansi.sql | 10 ++-- .../aggregation_functions_sqlite.sql | 43 ++++++++++++---- 13 files changed, 91 insertions(+), 60 deletions(-) diff --git a/pydough/pydough_operators/__init__.py b/pydough/pydough_operators/__init__.py index f3194928d..42f7f384e 100644 --- a/pydough/pydough_operators/__init__.py +++ b/pydough/pydough_operators/__init__.py @@ -67,7 +67,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -89,7 +89,7 @@ "RequireMinArgs", "RequireNumArgs", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -171,7 +171,7 @@ NOT, PERCENTILE, POPULATION_STD, - POPULATION_VARIANCE, + POPULATION_VAR, POW, POWER, PRESENT, @@ -187,7 +187,7 @@ ROUND, RPAD, SAMPLE_STD, - SAMPLE_VARIANCE, + SAMPLE_VAR, SECOND, SIGN, SLICE, diff --git a/pydough/pydough_operators/expression_operators/README.md b/pydough/pydough_operators/expression_operators/README.md index 6bd9cd368..f3a7a4b98 100644 --- a/pydough/pydough_operators/expression_operators/README.md +++ b/pydough/pydough_operators/expression_operators/README.md @@ -22,7 +22,7 @@ The expression_operators module provides functionality to define and manage vari ### [keyword_branching_operators.py](keyword_branching_operators.py) -- `KeywordBranchingExpressionFunctionOperator`: Implementation class for PyDough operators that return an `ExpressionFunctionOperator` and represent a function call that supports keyword arguments, such as `VAR` or `STD`. For example, `VAR` can be set with the keyword argument `type="population"` or `type="sample"`, thereby creating two different operators, `POPULATION_VARIANCE` and `SAMPLE_VARIANCE`. +- `KeywordBranchingExpressionFunctionOperator`: Implementation class for PyDough operators that return an `ExpressionFunctionOperator` and represent a function call that supports keyword arguments, such as `VAR` or `STD`. For example, `VAR` can be set with the keyword argument `type="population"` or `type="sample"`, thereby creating two different operators, `POPULATION_VAR` and `SAMPLE_VAR`. ### [binary_operators.py](binary_operators.py) @@ -145,9 +145,9 @@ These functions can be called on plural data to aggregate it into a singular exp - `NDISTINCT`: counts how many unique values exist in a plural expression (special: see collection aggregations). - `VAR`: the basic operation for variance, which is used to create the other variance functions with different types of keyword arguments. Note: `VAR` is not a valid PyDough function operator, but it is used internally to represent the basic variance operation. - `STD`: the basic operation for standard deviation, which is used to create the other standard deviation functions with different types of keyword arguments. Note: `STD` is not a valid PyDough function operator, but it is used internally to represent the basic standard deviation operation. -- `SAMPLE_VARIANCE`: returns the sample variance of the values of a plural expression. +- `SAMPLE_VAR`: returns the sample variance of the values of a plural expression. - `SAMPLE_STD`: returns the sample standard deviation of the values of a plural expression. -- `POPULATION_VARIANCE`: returns the population variance of the values of a plural expression. +- `POPULATION_VAR`: returns the population variance of the values of a plural expression. - `POPULATION_STD`: returns the population standard deviation of the values of a plural expression. ##### Collection Aggregations diff --git a/pydough/pydough_operators/expression_operators/__init__.py b/pydough/pydough_operators/expression_operators/__init__.py index e631b3f9d..97698c6e8 100644 --- a/pydough/pydough_operators/expression_operators/__init__.py +++ b/pydough/pydough_operators/expression_operators/__init__.py @@ -64,7 +64,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -81,7 +81,7 @@ "ROUND", "RPAD", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -163,7 +163,7 @@ NOT, PERCENTILE, POPULATION_STD, - POPULATION_VARIANCE, + POPULATION_VAR, POW, POWER, PRESENT, @@ -179,7 +179,7 @@ ROUND, RPAD, SAMPLE_STD, - SAMPLE_VARIANCE, + SAMPLE_VAR, SECOND, SIGN, SLICE, diff --git a/pydough/pydough_operators/expression_operators/registered_expression_operators.py b/pydough/pydough_operators/expression_operators/registered_expression_operators.py index b882a5c23..b03cf5e4f 100644 --- a/pydough/pydough_operators/expression_operators/registered_expression_operators.py +++ b/pydough/pydough_operators/expression_operators/registered_expression_operators.py @@ -58,7 +58,7 @@ "NOT", "PERCENTILE", "POPULATION_STD", - "POPULATION_VARIANCE", + "POPULATION_VAR", "POW", "POWER", "PRESENT", @@ -74,7 +74,7 @@ "ROUND", "RPAD", "SAMPLE_STD", - "SAMPLE_VARIANCE", + "SAMPLE_VAR", "SECOND", "SIGN", "SLICE", @@ -286,8 +286,8 @@ kwarg_defaults={"type": "population"}, ) # Define VAR with keyword branching for "type" which is represented internally. -POPULATION_VARIANCE = VAR.with_kwarg("POPULATION_VARIANCE", {"type": "population"}) -SAMPLE_VARIANCE = VAR.with_kwarg("SAMPLE_VARIANCE", {"type": "sample"}) +POPULATION_VAR = VAR.with_kwarg("POPULATION_VAR", {"type": "population"}) +SAMPLE_VAR = VAR.with_kwarg("SAMPLE_VAR", {"type": "sample"}) # Define STD with keyword branching STD = KeywordBranchingExpressionFunctionOperator( diff --git a/pydough/qdag/collections/augmenting_child_operator.py b/pydough/qdag/collections/augmenting_child_operator.py index c5f783026..6f8c1fcdc 100644 --- a/pydough/qdag/collections/augmenting_child_operator.py +++ b/pydough/qdag/collections/augmenting_child_operator.py @@ -87,7 +87,6 @@ def get_term(self, term_name: str) -> PyDoughQDAG: term = Reference(self.preceding_context, term_name) return term - @cache def to_string(self) -> str: return f"{self.preceding_context.to_string()}.{self.standalone_string}" diff --git a/pydough/sqlglot/transform_bindings/base_transform_bindings.py b/pydough/sqlglot/transform_bindings/base_transform_bindings.py index 716eeeccb..8f247ce6f 100644 --- a/pydough/sqlglot/transform_bindings/base_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/base_transform_bindings.py @@ -231,9 +231,9 @@ def convert_call_to_sqlglot( return self.convert_monotonic(args, types) case pydop.SQRT: return self.convert_sqrt(args, types) - case pydop.POPULATION_VARIANCE: + case pydop.POPULATION_VAR: return self.convert_variance(args, types, "population") - case pydop.SAMPLE_VARIANCE: + case pydop.SAMPLE_VAR: return self.convert_variance(args, types, "sample") case pydop.POPULATION_STD: return self.convert_std(args, types, "population") diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 539224e92..5be88dd60 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2965,13 +2965,13 @@ def test_pipeline_e2e_tpch_custom( pytest.param( simple_scan, [], - "Column selection must not be empty", + "Expected `columns` argument to be a non-empty list", id="bad_columns_1", ), pytest.param( simple_scan, {}, - "Column selection must not be empty", + "Expected `columns` argument to be a non-empty dictionary", id="bad_columns_2", ), pytest.param( @@ -3000,7 +3000,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_1, None, re.escape( - "Unrecognized term of TPCH.customers: 'c_name'. Did you mean: name, comment, phone?" + "Unrecognized term of TPCH.customers: 'c_name'. Did you mean: name, key, phone?" ), id="bad_name_1", ), @@ -3016,7 +3016,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_3, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(foo=1, bar=2, fizz=3, BUZZ=4): 'fizzbuzz'. Did you mean: fizz, BUZZ, bar?" + "Unrecognized term of TPCH.CALCULATE(foo=1, bar=2, fizz=3, BUZZ=4): 'fizzbuzz'. Did you mean: fizz, BUZZ, foo?" ), id="bad_name_3", ), @@ -3024,7 +3024,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_4, None, re.escape( - "Unrecognized term of TPCH.customers.orders: 'totalPrice'. Did you mean: total_price, clerk, lines?" + "Unrecognized term of TPCH.customers.orders: 'totalPrice'. Did you mean: total_price, clerk, key?" ), id="bad_name_4", ), @@ -3032,7 +3032,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_5, None, re.escape( - "Unrecognized term of TPCH.customers.orders: 'c_name'. Did you mean: clerk, comment, customer, lines, key, order_date?" + "Unrecognized term of TPCH.customers.orders: 'c_name'. Did you mean: key, lines, clerk, comment, customer?" ), id="bad_name_5", ), @@ -3040,7 +3040,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_6, None, re.escape( - "Unrecognized term of TPCH.customers: 'suppliers'. Did you mean: orders, address, phone, comment, key, name, nation?" + "Unrecognized term of TPCH.customers: 'suppliers'. Did you mean: orders, key, name, address, phone, nation?" ), id="bad_name_6", ), @@ -3056,7 +3056,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_8, None, re.escape( - "Unrecognized term of TPCH.customers: 'n123ame'. Did you mean: name, nation, phone?" + "Unrecognized term of TPCH.customers: 'n123ame'. Did you mean: name, key, phone?" ), id="bad_name_8", ), @@ -3064,7 +3064,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_9, None, re.escape( - "Unrecognized term of TPCH.customers: '__phone__'. Did you mean: phone, nation, address?" + "Unrecognized term of TPCH.customers: '__phone__'. Did you mean: phone, key, name?" ), id="bad_name_9", ), @@ -3096,7 +3096,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_13, None, re.escape( - "Unrecognized term of TPCH.customers: 'thisisareallylargename_that_exceeds_the_system_limit'. Did you mean: market_segment, account_balance, nation_key, address?" + "Unrecognized term of TPCH.customers: 'thisisareallylargename_that_exceeds_the_system_limit'. Did you mean: market_segment, name, orders, address, key, phone, nation, nation_key?" ), id="bad_name_13", ), @@ -3104,7 +3104,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_14, None, re.escape( - "Unrecognized term of TPCH.customers: 'keyname'. Did you mean: name, key, phone?" + "Unrecognized term of TPCH.customers: 'keyname'. Did you mean: key, name, phone?" ), id="bad_name_14", ), @@ -3112,7 +3112,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_15, None, re.escape( - "Unrecognized term of TPCH.customers: 'namekey'. Did you mean: name, key, nation, nation_key?" + "Unrecognized term of TPCH.customers: 'namekey'. Did you mean: name, key, nation?" ), id="bad_name_15", ), @@ -3120,7 +3120,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_16, None, re.escape( - "Unrecognized term of TPCH.customers: 'no_exist'. Did you mean: comment, name, nation, orders, address, key, phone?" + "Unrecognized term of TPCH.customers: 'no_exist'. Did you mean: name, key, comment, nation, orders, phone, address?" ), id="bad_name_16", ), @@ -3136,7 +3136,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_18, None, re.escape( - "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'nords'. Did you mean: n_orders, lines, clerk, key, year?" + "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'nords'. Did you mean: n_orders, key, lines, year, clerk?" ), id="bad_name_18", ), @@ -3152,7 +3152,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_20, None, re.escape( - "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'orders'. Did you mean: n_orders, clerk, lines?" + "Unrecognized term of TPCH.Partition(orders.CALCULATE(year=YEAR(order_date)), name='years', by=year).CALCULATE(n_orders=COUNT(orders)).orders: 'orders'. Did you mean: n_orders, clerk, key, lines, year?" ), id="bad_name_20", ), @@ -3168,7 +3168,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_22, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'Over_Intellectual_Ization'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'Over_Intellectual_Ization'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, PROFESSION_alization?" ), id="bad_name_22", ), @@ -3176,7 +3176,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_23, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'paio_eo_aliz_ation'. Did you mean: PROFESSION_alization, over_intellect_ualiz_ation, anthro_pomorph_IZATION?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'paio_eo_aliz_ation'. Did you mean: PROFESSION_alization, nations, parts, regions?" ), id="bad_name_23", ), @@ -3184,7 +3184,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_24, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): '_a_r_h_x_n_t_p_o_q__z_m_o_p_i__a_o_n_z_'. Did you mean: anthro_pomorph_IZATION, over_intellect_ualiz_ation, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): '_a_r_h_x_n_t_p_o_q__z_m_o_p_i__a_o_n_z_'. Did you mean: nations, parts, anthro_pomorph_IZATION, lines, regions?" ), id="bad_name_24", ), @@ -3192,7 +3192,7 @@ def test_pipeline_e2e_tpch_custom( bad_name_25, None, re.escape( - "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'anthropomorphization_and_overintellectualization_and_ultrarevolutionaries'. Did you mean: over_intellect_ualiz_ation, OVERIN_tellectualizers, anthro_pomorph_IZATION, ultra_revolution_aries, De_Institutionalizations?" + "Unrecognized term of TPCH.CALCULATE(anthro_pomorph_IZATION=1, counte_rintelligence=2, OVERIN_tellectualizers=3, ultra_revolution_aries=4, PROFESSION_alization=5, De_Institutionalizations=6, over_intellect_ualiz_ation=7): 'anthropomorphization_and_overintellectualization_and_ultrarevolutionaries'. Did you mean: over_intellect_ualiz_ation, anthro_pomorph_IZATION, OVERIN_tellectualizers, ultra_revolution_aries?" ), id="bad_name_25", ), @@ -3226,7 +3226,7 @@ def test_pipeline_e2e_tpch_custom( bad_cross_5, None, re.escape( - "Unrecognized term of TPCH.regions.CALCULATE(name=name).TPCH.regions.CALCULATE(name=name): 'regions'. Did you mean: nations, comment, key?" + "Unrecognized term of TPCH.regions.CALCULATE(name=name).TPCH.regions.CALCULATE(name=name): 'regions'. Did you mean: nations, key, name?" ), id="bad_cross_5", ), @@ -3234,11 +3234,12 @@ def test_pipeline_e2e_tpch_custom( bad_cross_6, None, re.escape( - "Unrecognized term of TPCH.suppliers.TPCH.parts: 'suppliers'. Did you mean: lines, supply_records, container, size, comment, key, name?" + "Unrecognized term of TPCH.suppliers.TPCH.parts: 'suppliers'. Did you mean: size, lines, key, name, supply_records?" ), id="bad_cross_6", ), - # NOTE: raised exception with an empty message + # TODO: fix the error handling here to give a proper error message + # (currently fails in hybrid due to an assertion) pytest.param( bad_cross_7, None, @@ -3249,7 +3250,7 @@ def test_pipeline_e2e_tpch_custom( bad_cross_8, None, re.escape( - "Unrecognized term of TPCH.regions.CALCULATE(r1=name).TPCH.nations: 'r_key'. Did you mean: key, name, r1?" + "Unrecognized term of TPCH.regions.CALCULATE(r1=name).TPCH.nations: 'r_key'. Did you mean: key, r1, name?" ), id="bad_cross_8", ), @@ -3268,7 +3269,7 @@ def test_pipeline_e2e_tpch_custom( pytest.param( bad_cross_11, None, - "Unrecognized term of TPCH.nations.TPCH.regions: 'customers'. Did you mean: comment, name, nations, key?", + "Unrecognized term of TPCH.nations.TPCH.regions: 'customers'. Did you mean: comment, name, key, nations?", id="bad_cross_11", ), pytest.param( diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index af3b1bbe0..5af72a1ec 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VAR(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VAR(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 85f3089c8..a29f26c60 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,5 +1,5 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VARIANCE(key_0), 'var_pop_1_nnull': POPULATION_VARIANCE(key_1), 'var_pop_2_nnull': POPULATION_VARIANCE(key_2), 'var_samp_0_nnull': SAMPLE_VARIANCE(key_0), 'var_samp_1_nnull': SAMPLE_VARIANCE(key_1), 'var_samp_2_nnull': SAMPLE_VARIANCE(key_2)}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VAR(key_0), 'var_pop_1_nnull': POPULATION_VAR(key_1), 'var_pop_2_nnull': POPULATION_VAR(key_2), 'var_samp_0_nnull': SAMPLE_VAR(key_0), 'var_samp_1_nnull': SAMPLE_VAR(key_1), 'var_samp_2_nnull': SAMPLE_VAR(key_2)}) PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_pydough_functions/all_pydough_functions_dialects.py b/tests/test_pydough_functions/all_pydough_functions_dialects.py index b5fd4dec7..06419eff3 100644 --- a/tests/test_pydough_functions/all_pydough_functions_dialects.py +++ b/tests/test_pydough_functions/all_pydough_functions_dialects.py @@ -186,8 +186,10 @@ def aggregation_functions(): anything_value=ANYTHING(customers.account_balance), count_value=COUNT(customers.account_balance), count_distinct_value=NDISTINCT(customers.account_balance), - variance_value=VAR(customers.account_balance, type="sample"), - stddev_value=STD(customers.account_balance, type="sample"), + variance_s_value=VAR(customers.account_balance, type="sample"), + variance_p_value=VAR(customers.account_balance, type="population"), + stddev_s_value=STD(customers.account_balance, type="sample"), + stddev_p_value=STD(customers.account_balance, type="population"), ).WHERE(HAS(customers) & HASNOT(customers.orders)) diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index de266738c..da16a0086 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -126,8 +126,8 @@ id="kwargfunc_1", ), pytest.param( - "result = nations.CALCULATE(name=name, var=SAMPLE_VARIANCE(suppliers.account_balance))", - "PyDough object SAMPLE_VARIANCE is not callable. Did you mean: SLICE, REPLACE, MEDIAN, SIGN, STRING, YEAR, ISIN, MIN, STRIP, SUM, UPPER, VAR?", + "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", + "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", id="kwargfunc_2", ), pytest.param( diff --git a/tests/test_sql_refsols/aggregation_functions_ansi.sql b/tests/test_sql_refsols/aggregation_functions_ansi.sql index dd83c4df2..96a109798 100644 --- a/tests/test_sql_refsols/aggregation_functions_ansi.sql +++ b/tests/test_sql_refsols/aggregation_functions_ansi.sql @@ -16,8 +16,10 @@ WITH _s1 AS ( MEDIAN(customer.c_acctbal) AS median_c_acctbal, MIN(customer.c_acctbal) AS min_c_acctbal, COUNT(DISTINCT customer.c_acctbal) AS ndistinct_c_acctbal, + STDDEV_POP(customer.c_acctbal) AS population_std_c_acctbal, + VARIANCE_POP(customer.c_acctbal) AS population_var_c_acctbal, STDDEV(customer.c_acctbal) AS sample_std_c_acctbal, - VARIANCE(customer.c_acctbal) AS sample_variance_c_acctbal, + VARIANCE(customer.c_acctbal) AS sample_var_c_acctbal, SUM(customer.c_acctbal) AS sum_c_acctbal, SUM(_s1.n_rows) AS sum_n_rows, customer.c_nationkey @@ -37,8 +39,10 @@ SELECT _t1.anything_c_acctbal AS anything_value, _t1.count_c_acctbal AS count_value, _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value + _t1.sample_var_c_acctbal AS variance_s_value, + _t1.population_var_c_acctbal AS variance_p_value, + _t1.sample_std_c_acctbal AS stddev_s_value, + _t1.population_std_c_acctbal AS stddev_p_value FROM tpch.nation AS nation JOIN _t1 AS _t1 ON _t1.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/aggregation_functions_sqlite.sql b/tests/test_sql_refsols/aggregation_functions_sqlite.sql index c8aaa7986..c2f829e61 100644 --- a/tests/test_sql_refsols/aggregation_functions_sqlite.sql +++ b/tests/test_sql_refsols/aggregation_functions_sqlite.sql @@ -19,12 +19,12 @@ WITH _s1 AS ( ) < 1.0 THEN customer.c_acctbal ELSE NULL - END AS expr_15, + END AS expr_17, CASE WHEN CAST(0.19999999999999996 * COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) AS INTEGER) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) THEN customer.c_acctbal ELSE NULL - END AS expr_16, + END AS expr_18, customer.c_acctbal, customer.c_nationkey, _s1.n_rows @@ -35,12 +35,35 @@ WITH _s1 AS ( SELECT MAX(c_acctbal) AS anything_c_acctbal, AVG(c_acctbal) AS avg_c_acctbal, - AVG(expr_15) AS avg_expr_15, + AVG(expr_17) AS avg_expr_17, COUNT(c_acctbal) AS count_c_acctbal, MAX(c_acctbal) AS max_c_acctbal, - MAX(expr_16) AS max_expr_16, + MAX(expr_18) AS max_expr_18, MIN(c_acctbal) AS min_c_acctbal, COUNT(DISTINCT c_acctbal) AS ndistinct_c_acctbal, + POWER( + ( + CAST(( + SUM(( + POWER(c_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(c_acctbal), 2) + ) AS REAL) / COUNT(c_acctbal) + ) + ) AS REAL) / COUNT(c_acctbal) + ), + 0.5 + ) AS population_std_c_acctbal, + CAST(( + SUM(( + POWER(c_acctbal, 2) + )) - ( + CAST(( + POWER(SUM(c_acctbal), 2) + ) AS REAL) / COUNT(c_acctbal) + ) + ) AS REAL) / COUNT(c_acctbal) AS population_var_c_acctbal, POWER( ( CAST(( @@ -67,7 +90,7 @@ WITH _s1 AS ( ) ) AS REAL) / ( COUNT(c_acctbal) - 1 - ) AS sample_variance_c_acctbal, + ) AS sample_var_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, SUM(n_rows) AS sum_n_rows, c_nationkey @@ -78,15 +101,17 @@ WITH _s1 AS ( SELECT COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, + _t1.avg_expr_17 AS median_value, _t1.min_c_acctbal AS min_value, _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, + _t1.max_expr_18 AS quantile_value, _t1.anything_c_acctbal AS anything_value, _t1.count_c_acctbal AS count_value, _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value + _t1.sample_var_c_acctbal AS variance_s_value, + _t1.population_var_c_acctbal AS variance_p_value, + _t1.sample_std_c_acctbal AS stddev_s_value, + _t1.population_std_c_acctbal AS stddev_p_value FROM tpch.nation AS nation JOIN _t1 AS _t1 ON _t1.c_nationkey = nation.n_nationkey From b94e76dc171281c3070590446908d799973fd157 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:30:23 -0400 Subject: [PATCH 56/97] WIP --- tests/test_pipeline_tpch_udf.py | 31 ++++++++++--------- .../bad_pydough_functions.py | 20 ------------ tests/testing_utilities.py | 6 ++-- 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 42ec6c57d..002363cbf 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -11,13 +11,6 @@ from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.metadata import GraphMetadata -from pydough.unqualified import UnqualifiedNode -from tests.test_pydough_functions.bad_pydough_functions import ( - bad_sqlite_udf_1, - bad_sqlite_udf_2, - bad_sqlite_udf_3, - bad_sqlite_udf_4, -) from tests.test_pydough_functions.udf_pydough_functions import ( sqlite_udf_combine_strings, sqlite_udf_count_epsilon, @@ -412,32 +405,42 @@ def test_pipeline_e2e_tpch_sqlite_udf( @pytest.mark.parametrize( - "pydough_impl, error_message", + "pydough_text, error_message", [ pytest.param( - bad_sqlite_udf_1, + # Calling a UDF that requires 2 arguments with only 1 argument + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y')\": Expected 2 arguments, received 1", id="bad_sqlite_udf_1", ), pytest.param( - bad_sqlite_udf_2, + # Calling a UDF that requires 2 arguments with 3 arguments + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y' order_date, 'foo'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y', order_date, 'foo')\": Expected 2 arguments, received 3", id="bad_sqlite_udf_2", ), pytest.param( - bad_sqlite_udf_3, + # Calling a UDF that requires 1-2 arguments with 0 arguments + "result = nations.CALCULATE(x=GCAT(by=name.ASC()))", "Invalid operator invocation 'GCAT()': Expected between 1 and 2 arguments inclusive, received 0", id="bad_sqlite_udf_3", ), pytest.param( - bad_sqlite_udf_4, + # Calling a UDF that requires 1-2 arguments with 3 arguments + "result = nations.CALCULATE(x=GCAT(name, ';', 'bar', by=name.ASC()))", "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", id="bad_sqlite_udf_4", ), + pytest.param( + # Calling a UDF function that doesn't exist + "result = order.CALCULATE(x=fmtdate('%Y', order_date))", + "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", + id="bad_sqlite_udf_5", + ), ], ) def test_pipeline_tpch_sqlite_udf_errors( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_text: str, error_message: str, get_udf_graph: graph_fetcher, ): @@ -446,7 +449,7 @@ def test_pipeline_tpch_sqlite_udf_errors( """ graph: GraphMetadata = get_udf_graph("TPCH_SQLITE_UDFS") run_e2e_error_test( - pydough_impl, + pydough_text, re.escape(error_message), graph, ) diff --git a/tests/test_pydough_functions/bad_pydough_functions.py b/tests/test_pydough_functions/bad_pydough_functions.py index fe6cb7f65..f019fb8c2 100644 --- a/tests/test_pydough_functions/bad_pydough_functions.py +++ b/tests/test_pydough_functions/bad_pydough_functions.py @@ -483,26 +483,6 @@ def bad_name_25(): ) -def bad_sqlite_udf_1(): - # Calling a UDF that requires 2 arguments with only 1 argument - return orders.CALCULATE(x=FORMAT_DATETIME("%Y")) - - -def bad_sqlite_udf_2(): - # Calling a UDF that requires 2 arguments with 3 arguments - return orders.CALCULATE(x=FORMAT_DATETIME("%Y", order_date, "foo")) - - -def bad_sqlite_udf_3(): - # Calling a UDF that requires 1-2 arguments with 0 arguments - return nations.CALCULATE(x=GCAT(by=name.ASC())) - - -def bad_sqlite_udf_4(): - # Calling a UDF that requires 1-2 arguments with 3 arguments - return nations.CALCULATE(x=GCAT(name, ";", "bar", by=name.ASC())) - - # TEST for CROSS def bad_cross_1(): # Reason it is bad: Using `CROSS` with a not a collection diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index ac2274bcd..fd78c30db 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1226,7 +1226,7 @@ def run_e2e_test( def run_e2e_error_test( - pydough_impl: Callable[[], UnqualifiedNode], + pydough_impl: Callable[[], UnqualifiedNode] | str, error_message: str, graph: GraphMetadata, columns: dict[str, str] | list[str] | None = None, @@ -1239,7 +1239,8 @@ def run_e2e_error_test( provided `error_message`. Args: - `pydough_impl`: The PyDough function to be tested. + `pydough_impl`: The PyDough function to be tested, or the string that + should be evaluated to obtain the PyDough code. `error_message`: The error message that is expected to be raised. `graph`: The metadata graph to use for the test. `columns`: The columns argument to use for the test, if any. @@ -1247,6 +1248,7 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): + assert not isinstance(pydough_impl, str) root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph) call_kwargs: dict = {} if graph is not None: From 19a0fb84d750fb5843c70984d2bb1d6b73c26576 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 14:35:53 -0400 Subject: [PATCH 57/97] Resolving conflicts and fixing UDF tests --- tests/test_pipeline_tpch_udf.py | 6 +++--- tests/test_plan_refsols/simple_var_std_with_nulls.txt | 9 +-------- tests/test_sql_refsols/defog_broker_adv15_ansi.sql | 4 +++- tests/test_sql_refsols/defog_broker_adv15_sqlite.sql | 4 +++- tests/testing_utilities.py | 3 --- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/test_pipeline_tpch_udf.py b/tests/test_pipeline_tpch_udf.py index 002363cbf..cb58bf775 100644 --- a/tests/test_pipeline_tpch_udf.py +++ b/tests/test_pipeline_tpch_udf.py @@ -415,7 +415,7 @@ def test_pipeline_e2e_tpch_sqlite_udf( ), pytest.param( # Calling a UDF that requires 2 arguments with 3 arguments - "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y' order_date, 'foo'))", + "result = orders.CALCULATE(x=FORMAT_DATETIME('%Y', order_date, 'foo'))", "Invalid operator invocation \"FORMAT_DATETIME('%Y', order_date, 'foo')\": Expected 2 arguments, received 3", id="bad_sqlite_udf_2", ), @@ -433,8 +433,8 @@ def test_pipeline_e2e_tpch_sqlite_udf( ), pytest.param( # Calling a UDF function that doesn't exist - "result = order.CALCULATE(x=fmtdate('%Y', order_date))", - "Invalid operator invocation \"GCAT(name, ';', 'bar')\": Expected between 1 and 2 arguments inclusive, received 3.", + "result = order.CALCULATE(x=FORMATDATETIME('%Y', order_date))", + "PyDough object FORMATDATETIME is not callable. Did you mean: FORMAT_DATETIME, DATETIME, FLOAT?", id="bad_sqlite_udf_5", ), ], diff --git a/tests/test_plan_refsols/simple_var_std_with_nulls.txt b/tests/test_plan_refsols/simple_var_std_with_nulls.txt index 811417dab..620efe547 100644 --- a/tests/test_plan_refsols/simple_var_std_with_nulls.txt +++ b/tests/test_plan_refsols/simple_var_std_with_nulls.txt @@ -1,11 +1,4 @@ ROOT(columns=[('var_samp_0_nnull', var_samp_0_nnull), ('var_samp_1_nnull', var_samp_1_nnull), ('var_samp_2_nnull', var_samp_2_nnull), ('var_pop_0_nnull', var_pop_0_nnull), ('var_pop_1_nnull', var_pop_1_nnull), ('var_pop_2_nnull', var_pop_2_nnull), ('std_samp_0_nnull', std_samp_0_nnull), ('std_samp_1_nnull', std_samp_1_nnull), ('std_samp_2_nnull', std_samp_2_nnull), ('std_pop_0_nnull', std_pop_0_nnull), ('std_pop_1_nnull', std_pop_1_nnull), ('std_pop_2_nnull', std_pop_2_nnull)], orderings=[]) -<<<<<<< HEAD - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(key_0), 'std_pop_1_nnull': POPULATION_STD(key_1), 'std_pop_2_nnull': POPULATION_STD(key_2), 'std_samp_0_nnull': SAMPLE_STD(key_0), 'std_samp_1_nnull': SAMPLE_STD(key_1), 'std_samp_2_nnull': SAMPLE_STD(key_2), 'var_pop_0_nnull': POPULATION_VAR(key_0), 'var_pop_1_nnull': POPULATION_VAR(key_1), 'var_pop_2_nnull': POPULATION_VAR(key_2), 'var_samp_0_nnull': SAMPLE_VAR(key_0), 'var_samp_1_nnull': SAMPLE_VAR(key_1), 'var_samp_2_nnull': SAMPLE_VAR(key_2)}) - PROJECT(columns={'key_0': KEEP_IF(c_acctbal, c_custkey > 3:numeric), 'key_1': KEEP_IF(c_acctbal, c_custkey > 2:numeric), 'key_2': KEEP_IF(c_acctbal, c_custkey > 1:numeric)}) - FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) -======= - AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) + AGGREGATE(keys={}, aggregations={'std_pop_0_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_pop_1_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_pop_2_nnull': POPULATION_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'std_samp_0_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'std_samp_1_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'std_samp_2_nnull': SAMPLE_STD(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_pop_0_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_pop_1_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_pop_2_nnull': POPULATION_VAR(KEEP_IF(c_acctbal, c_custkey > 1:numeric)), 'var_samp_0_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 3:numeric)), 'var_samp_1_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 2:numeric)), 'var_samp_2_nnull': SAMPLE_VAR(KEEP_IF(c_acctbal, c_custkey > 1:numeric))}) FILTER(condition=ISIN(c_custkey, [1, 2, 3]:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) ->>>>>>> kian/pagerank diff --git a/tests/test_sql_refsols/defog_broker_adv15_ansi.sql b/tests/test_sql_refsols/defog_broker_adv15_ansi.sql index 7ada2b05b..865bf4ed4 100644 --- a/tests/test_sql_refsols/defog_broker_adv15_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv15_ansi.sql @@ -1,6 +1,8 @@ SELECT sbcustcountry AS country, - 100 * COALESCE(COALESCE(SUM(sbcuststatus = 'active'), 0) / COUNT(*), 0.0) AS ar + 100 * ( + COALESCE(SUM(sbcuststatus = 'active'), 0) / COUNT(*) + ) AS ar FROM main.sbcustomer WHERE sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' diff --git a/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql index e2663ce7b..27a090ce1 100644 --- a/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv15_sqlite.sql @@ -1,6 +1,8 @@ SELECT sbcustcountry AS country, - 100 * COALESCE(CAST(COALESCE(SUM(sbcuststatus = 'active'), 0) AS REAL) / COUNT(*), 0.0) AS ar + 100 * ( + CAST(COALESCE(SUM(sbcuststatus = 'active'), 0) AS REAL) / COUNT(*) + ) AS ar FROM main.sbcustomer WHERE sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 887d386e3..321286516 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1297,9 +1297,6 @@ def run_e2e_error_test( `config`: The PyDough configuration to use for the test, if any. """ with pytest.raises(Exception, match=error_message): - assert not isinstance(pydough_impl, str), ( - "Expected pydough_impl to be a callable, not a string" - ) root: UnqualifiedNode = transform_and_exec_pydough(pydough_impl, graph, None) call_kwargs: dict = {} if graph is not None: From bf69fe8f2a072b1b27d57f614deaa1449e8fbed0 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 15:07:13 -0400 Subject: [PATCH 58/97] Moved window errors --- pydough/errors/pydough_error_builder.py | 54 ++++++++++++- pydough/unqualified/qualification.py | 101 +++++++++++++----------- tests/test_qualification_errors.py | 19 +++-- 3 files changed, 121 insertions(+), 53 deletions(-) diff --git a/pydough/errors/pydough_error_builder.py b/pydough/errors/pydough_error_builder.py index 9582f0295..12ec8bb5b 100644 --- a/pydough/errors/pydough_error_builder.py +++ b/pydough/errors/pydough_error_builder.py @@ -16,7 +16,7 @@ from pydough.pydough_operators import PyDoughOperator from pydough.qdag import PyDoughCollectionQDAG, PyDoughExpressionQDAG from pydough.relational import CallExpression - from pydough.unqualified import UnqualifiedNode + from pydough.unqualified import UnqualifiedNode, UnqualifiedWindow class PyDoughErrorBuilder: @@ -290,3 +290,55 @@ def undefined_function_call( else: error_message += " Did you mean to use a function?" return PyDoughUnqualifiedException(error_message) + + def bad_window_per( + self, + per: str, + ancestral_names: list[str], + context: "PyDoughCollectionQDAG", + window: "UnqualifiedWindow", + ) -> PyDoughException: + """ + Creates an exception for when the `per` string in a window is malformed. + + Args: + `per`: The per string that caused the error. + `ancestral_names`: The names of the ancestors in the context. + `context`: The collection context where the error occurred. + `window`: The unqualified window that contains the per string. + + Returns: + An exception indicating the malformed per string. + """ + ancestor_name: str + ancestor_idx: int | None = None + msg: str | None = None + components: list[str] = per.split(":") + + # Extract the name/idx components of `per=name:idx`, identifying an + # error if not in that format. + if len(components) <= 2: + if len(components) == 1: + ancestor_name = components[0] + ancestor_idx = None + elif len(components) == 2: + ancestor_name = components[0] + if not components[1].isdigit() or int(components[1]) <= 0: + msg = "expected the index after ':' to be a positive integer" + else: + ancestor_idx = int(components[1]) + # If an error was not found yet, figure out what is wrong with + # `name` or `idx`. + if msg is None: + if ancestor_name not in ancestral_names: + msg = f"unrecognized ancestor {ancestor_name!r}" + elif ancestor_idx is None and ancestral_names.count(ancestor_name) > 1: + msg = f"per-string {ancestor_name!r} is ambiguous in this context; use the form '{ancestor_name}:index' to disambiguate, where '{ancestor_name}:1' refers to the most recent ancestor" + else: + msg = f"there are not {ancestor_idx} ancestors of the current context with name {ancestor_name!r}" + else: + msg = f"expected 0 or 1 ':', found {len(components) - 1})" + + return PyDoughUnqualifiedException( + f"Error while parsing 'per' string of {window} in context {context} ({msg})" + ) diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index 82783d49f..57b125e32 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -208,6 +208,59 @@ def qualify_binary_operation( operator, [qualified_lhs, qualified_rhs] ) + def extract_window_per_args( + self, + per: str, + ancestral_names: list[str], + context: PyDoughCollectionQDAG, + window: UnqualifiedWindow, + ) -> tuple[str, int | None]: + ancestor_name: str + ancestor_idx: int | None + # Break down the per string into its components, which is either + # `[name]`, or `[name, index]`, where `index` must be a positive + # integer. + components: list[str] = per.split(":") + if len(components) == 1: + ancestor_name = components[0] + ancestor_idx = None + elif len(components) == 2: + ancestor_name = components[0] + if not components[1].isdigit(): + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + ancestor_idx = int(components[1]) + if ancestor_idx <= 0: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + else: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + # Verify that `name` corresponds to one of the ancestors of the + # current context. + if ancestor_name not in ancestral_names: + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + # Verify that `name` is only present exactly one time in the + # ancestors of the current context, unless an index was provided. + if ancestor_idx is None: + if ancestral_names.count(ancestor_name) > 1: + # TODO: potentially add a default value of 1? + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + elif ancestral_names.count(ancestor_name) < ancestor_idx: + # If an index was provided, ensure that there are that many + # ancestors with that name. + raise pydough.active_session.error_builder.bad_window_per( + per, ancestral_names, context, window + ) + return ancestor_name, ancestor_idx + def qualify_window( self, unqualified: UnqualifiedWindow, @@ -259,51 +312,9 @@ def qualify_window( # the number of ancestor levels to go up to). if per is not None: ancestral_names: list[str] = context.get_ancestral_names() - ancestor_name: str - ancestor_idx: int | None - # Break down the per string into its components, which is either - # `[name]`, or `[name, index]`, where `index` must be a positive - # integer. - components: list[str] = per.split(":") - if len(components) == 1: - ancestor_name = components[0] - ancestor_idx = None - elif len(components) == 2: - ancestor_name = components[0] - if not components[1].isdigit(): - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected the index after ':' to be a positive integer)" - ) - ancestor_idx = int(components[1]) - if ancestor_idx <= 0: - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected the index after ':' to be a positive integer)" - ) - else: - raise PyDoughUnqualifiedException( - f"Malformed per string: {per!r} (expected 0 or 1 ':', found {len(components) - 1})" - ) - # Verify that `name` corresponds to one of the ancestors of the - # current context. - if ancestor_name not in ancestral_names: - raise PyDoughUnqualifiedException( - f"Per string refers to unrecognized ancestor {ancestor_name!r} of {context!r}" - ) - # Verify that `name` is only present exactly one time in the - # ancestors of the current context, unless an index was provided. - if ancestor_idx is None: - if ancestral_names.count(ancestor_name) > 1: - # TODO: potentially add a default value of 1? - raise PyDoughUnqualifiedException( - f"Per string {per!r} is ambiguous for {context!r}. Use the form '{per}:index' to disambiguate, where '{per}:1' refers to the most recent ancestor." - ) - elif ancestral_names.count(ancestor_name) < ancestor_idx: - # If an index was provided, ensure that there are that many - # ancestors with that name. - raise PyDoughUnqualifiedException( - f"Per string {per!r} invalid as there are not {ancestor_idx} ancestors of the current context with name {ancestor_name!r}." - ) - + ancestor_name, ancestor_idx = self.extract_window_per_args( + per, ancestral_names, context, unqualified + ) # Find how many levels upward need to be traversed to find the # targeted ancestor by finding the nth ancestor matching the # name, at the end of the ancestral_names. diff --git a/tests/test_qualification_errors.py b/tests/test_qualification_errors.py index da16a0086..6f1726c66 100644 --- a/tests/test_qualification_errors.py +++ b/tests/test_qualification_errors.py @@ -87,39 +87,44 @@ ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='custs'))", - "Per string refers to unrecognized ancestor 'custs' of TPCH.customers.orders", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='custs') in context TPCH.customers.orders (unrecognized ancestor 'custs')", id="bad_per_1", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:2'))", - "Per string 'customers:2' invalid as there are not 2 ancestors of the current context with name 'customers'.", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:2') in context TPCH.customers.orders (there are not 2 ancestors of the current context with name 'customers')", id="bad_per_2", ), pytest.param( "result = customers.orders.customer.orders.lines.CALCULATE(RANKING(by=extended_price.DESC(), per='orders'))", - "Per string 'orders' is ambiguous for TPCH.customers.orders.customer.orders.lines. Use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor.", + "Error while parsing 'per' string of RANKING(by=(extended_price.DESC(na_pos='last'), per='orders') in context TPCH.customers.orders.customer.orders.lines (per-string 'orders' is ambiguous in this context; use the form 'orders:index' to disambiguate, where 'orders:1' refers to the most recent ancestor)", id="bad_per_3", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:k'))", - "Malformed per string: 'customers:k' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:k') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_4", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:1:2'))", - "Malformed per string: 'customers:1:2' (expected 0 or 1 ':', found 2)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:1:2') in context TPCH.customers.orders (expected 0 or 1 ':', found 2))", id="bad_per_5", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:'))", - "Malformed per string: 'customers:' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_6", ), pytest.param( "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per='customers:0'))", - "Malformed per string: 'customers:0' (expected the index after ':' to be a positive integer)", + "Error while parsing 'per' string of RANKING(by=(key.ASC(na_pos='first'), per='customers:0') in context TPCH.customers.orders (expected the index after ':' to be a positive integer)", id="bad_per_7", ), + pytest.param( + "result = customers.orders.CALCULATE(RANKING(by=key.ASC(), per=-1))", + "`per` argument must be a string", + id="bad_per_8", + ), pytest.param( "result = nations.CALCULATE(name=name, var=SAMPLE_VAR(suppliers.account_balance))", "PyDough object SAMPLE_VAR is not callable. Did you mean: YEAR, SUM, UPPER, VAR, AVG, LPAD, PREV, RPAD, DAY, FLOAT, FLOOR, HAS, LOWER, MAX, POWER, SLICE, SMALLEST, SQRT, STD, ABS, CEIL, GETPART, HOUR, LIKE, MEDIAN, NEXT, QUARTER, RELAVG, REPLACE, SECOND, SIGN, STRIP?", From c5fdfef1fb23ff5092ef362ca0df4b2c641dfdb9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 17:07:37 -0400 Subject: [PATCH 59/97] Updating helper [RUN CI] --- pydough/conversion/projection_pullup.py | 60 +++---------------- .../agg_simplification_1.txt | 5 +- tests/test_plan_refsols/common_prefix_al.txt | 23 ++++--- tests/test_plan_refsols/common_prefix_am.txt | 21 ++++--- tests/test_plan_refsols/common_prefix_n.txt | 49 ++++++++------- tests/test_plan_refsols/common_prefix_o.txt | 53 ++++++++-------- tests/test_plan_refsols/common_prefix_s.txt | 31 +++++----- tests/test_plan_refsols/correl_24.txt | 13 ++-- .../month_year_sliding_windows.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 21 ++++--- .../multi_partition_access_5.txt | 34 +++++------ tests/test_plan_refsols/part_reduced_size.txt | 9 ++- tests/test_plan_refsols/simple_cross_5.txt | 4 +- tests/test_plan_refsols/singular7.txt | 4 +- .../test_plan_refsols/supplier_best_part.txt | 21 ++++--- .../technograph_monthly_incident_rate.txt | 11 ++-- ..._year_cumulative_incident_rate_overall.txt | 25 ++++---- tests/test_plan_refsols/tpch_q11.txt | 35 ++++++----- tests/test_plan_refsols/tpch_q18.txt | 17 +++--- tests/test_plan_refsols/tpch_q22.txt | 21 ++++--- .../defog_dealership_gen4_ansi.sql | 4 +- .../defog_dealership_gen4_sqlite.sql | 4 +- ...technograph_monthly_incident_rate_ansi.sql | 46 +++++++------- ...chnograph_monthly_incident_rate_sqlite.sql | 46 +++++++------- ..._cumulative_incident_rate_overall_ansi.sql | 4 +- ...umulative_incident_rate_overall_sqlite.sql | 4 +- tests/test_sql_refsols/tpch_q11_ansi.sql | 2 +- tests/test_sql_refsols/tpch_q11_sqlite.sql | 2 +- tests/test_sql_refsols/tpch_q18_ansi.sql | 12 ++-- tests/test_sql_refsols/tpch_q18_sqlite.sql | 12 ++-- 30 files changed, 267 insertions(+), 330 deletions(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 356acca41..a2c891a1b 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -29,9 +29,6 @@ contains_window, transpose_expression, ) -from pydough.relational.relational_expressions.column_reference_finder import ( - ColumnReferenceFinder, -) from pydough.types import BooleanType, NumericType from .merge_projects import merge_adjacent_projects @@ -153,8 +150,6 @@ def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: def pull_project_helper( - output_columns: dict[str, RelationalExpression], - used_columns: set[RelationalExpression], project: Project, input_name: str | None, ) -> dict[RelationalExpression, RelationalExpression]: @@ -166,11 +161,6 @@ def pull_project_helper( to substitute the columns in the parent node's output columns or conditions. Args: - `output_columns`: The columns of the parent node that the expressions - from the project node can be pulled into. - `used_columns`: The set of expressions indicating invocations of the - columns from the project in the parent node, e.g. as a filter - or join condition, limit ordering, or aggregation key. `project`: The Project node to pull columns from. `input_name`: The name of the input to the parent node that the project node is connected to. This is used to add input names to the @@ -180,8 +170,7 @@ def pull_project_helper( A mapping of expressions that can be used to substitute the columns in the parent node's output columns or conditions. This mapping will ensure columns are only pulled up if they do not contain window - functions, and they are not simultaneously used in the parent's output - while also being used in the condition or orderings. + functions. """ # Ensure every column in the project's inputs is also present in the output # columns of the project. This will ensure that any function calls that are @@ -191,36 +180,15 @@ def pull_project_helper( widen_columns(project) ) - # Identify which columns from the project node are used in the condition - # or orderings, versus those used in the output columns of the parent. - finder: ColumnReferenceFinder = ColumnReferenceFinder() - - # First, the columns used in the output columns of the parent. - finder.reset() - for expr in output_columns.values(): - expr.accept(finder) - output_cols: set[ColumnReference] = finder.get_column_references() - output_names: set[str] = {col.name for col in output_cols} - - # Next the columns that are utilized by the node. - finder.reset() - for expr in used_columns: - expr.accept(finder) - used_cols: set[ColumnReference] = finder.get_column_references() - used_names: set[str] = {col.name for col in used_cols} - # Iterate through the columns of the project to see which ones can be - # pulled up into the parent's output columns vs condition/orderings, - # adding them to a substitutions mapping that will be used to apply the - # transformations. + # pulled up into the parent, dding them to a substitutions mapping that + # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): new_expr: RelationalExpression = add_input_name( apply_substitution(expr, transfer_substitutions, {}), input_name ) - if (not contains_window(new_expr)) and ( - (name in used_names) != (name in output_names) - ): + if not contains_window(new_expr): ref_expr: ColumnReference = ColumnReference( name, expr.data_type, input_name=input_name ) @@ -251,12 +219,7 @@ def pull_project_into_join(node: Join, input_index: int) -> None: # columns or condition, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - node.columns, - {node.condition}, - project, - node.default_input_aliases[input_index], - ) + pull_project_helper(project, node.default_input_aliases[input_index]) ) # Apply the substitutions to the join's condition and output columns. @@ -286,7 +249,7 @@ def pull_project_into_filter(node: Filter) -> None: # columns or condition, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper(node.columns, {node.condition}, node.input, None) + pull_project_helper(node.input, None) ) # Apply the substitutions to the filter's condition and output columns. @@ -316,12 +279,7 @@ def pull_project_into_limit(node: Limit) -> None: # columns or orderings, and modifies the project node in-place to ensure # every column in the project's inputs is available to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - node.columns, - {order_expr.expr for order_expr in node.orderings}, - node.input, - None, - ) + pull_project_helper(node.input, None) ) # Apply the substitutions to the limit's orderings and output columns. @@ -533,9 +491,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: # node in-place to ensure every column in the project's inputs is available # to the current node. substitutions: dict[RelationalExpression, RelationalExpression] = ( - pull_project_helper( - dict(node.aggregations.items()), set(node.keys.values()), node.input, None - ) + pull_project_helper(node.input, None) ) # Build up the columns of a new project that points to all of the output diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 014c1dbdf..13689d041 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,4 +1,3 @@ ROOT(columns=[('aug_exchange', aug_exchange), ('su1', DEFAULT_TO(count_one, 0:numeric)), ('su2', DEFAULT_TO(count_one * 2:numeric, 0:numeric)), ('su3', DEFAULT_TO(count_one * -1:numeric, 0:numeric)), ('su4', DEFAULT_TO(count_one * -3:numeric, 0:numeric)), ('su5', DEFAULT_TO(0:numeric, 0:numeric)), ('su6', DEFAULT_TO(count_one * 0.5:numeric, 0:numeric)), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) - AGGREGATE(keys={'aug_exchange': aug_exchange}, aggregations={'agg_63': QUANTILE(aug_exchange, 0.8:numeric), 'count_one': COUNT()}) - PROJECT(columns={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}) - SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) + AGGREGATE(keys={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}, aggregations={'agg_63': QUANTILE(LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string)), 0.8:numeric), 'count_one': COUNT()}) + SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index d5af6cd67..d9d5391ca 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t0.n_rows}) - LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows_1, 0:numeric))], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index f42e5981e..5cd9dcaf3 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,14 +1,13 @@ -ROOT(columns=[('cust_key', c_custkey), ('n_orders', n_orders), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_orders': t0.n_orders, 'n_rows': t1.n_rows}) - LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_orders': n_orders}, orderings=[(c_custkey):asc_first]) - FILTER(condition=n_orders > RELAVG(args=[n_orders], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_orders': n_orders}) - PROJECT(columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_orders': DEFAULT_TO(n_rows, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) +ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_agg_3': SUM(agg_3)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 0235aa2fa..cbac8d607 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,25 +1,24 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + PROJECT(columns={'agg_11': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 056f09af0..07efb3875 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,27 +1,26 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', n_elements), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', n_unique_supplier_nations), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=n_elements > n_unique_supplier_nations, columns={'max_s_acctbal': max_s_acctbal, 'n_elements': n_elements, 'n_small_parts': sum_sum_agg_5, 'n_unique_supplier_nations': n_unique_supplier_nations, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'n_elements': DEFAULT_TO(sum_n_rows, 0:numeric), 'n_unique_supplier_nations': DEFAULT_TO(ndistinct_n_name, 0:numeric), 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) - FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 9d8b97da5..b061c0777 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,16 +1,15 @@ -ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', most_recent_order_total), ('most_recent_order_distinct', most_recent_order_distinct)], orderings=[(c_name):asc_first]) - FILTER(condition=most_recent_order_distinct < most_recent_order_total, columns={'c_name': c_name, 'most_recent_order_distinct': most_recent_order_distinct, 'most_recent_order_total': most_recent_order_total, 'o_orderdate': o_orderdate}) - PROJECT(columns={'c_name': c_name, 'most_recent_order_distinct': DEFAULT_TO(ndistinct_l_suppkey, 0:numeric), 'most_recent_order_total': DEFAULT_TO(n_rows, 0:numeric), 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) - FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_suppkey': NDISTINCT(l_suppkey)}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) +ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', DEFAULT_TO(n_rows, 0:numeric)), ('most_recent_order_distinct', DEFAULT_TO(ndistinct_l_suppkey, 0:numeric))], orderings=[(c_name):asc_first]) + FILTER(condition=DEFAULT_TO(ndistinct_l_suppkey, 0:numeric) < DEFAULT_TO(n_rows, 0:numeric), columns={'c_name': c_name, 'n_rows': n_rows, 'ndistinct_l_suppkey': ndistinct_l_suppkey, 'o_orderdate': o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_suppkey': NDISTINCT(l_suppkey)}) + FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 0afce8592..5ecd14c7e 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,13 +1,10 @@ ROOT(columns=[('year', year_7), ('month', month_6), ('n_orders_in_range', n_orders_in_range)], orderings=[(year_7):asc_first, (month_6):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'month_6': ANYTHING(month), 'n_orders_in_range': COUNT(), 'year_7': ANYTHING(year)}) FILTER(condition=MONOTONIC(prev_month_avg_price, o_totalprice, avg_o_totalprice) | MONOTONIC(avg_o_totalprice, o_totalprice, prev_month_avg_price), columns={'month': month, 'year': year}) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate), type=INNER, cardinality=PLURAL_FILTER, columns={'avg_o_totalprice': t0.avg_o_totalprice, 'month': t0.month, 'o_totalprice': t1.o_totalprice, 'prev_month_avg_price': t0.prev_month_avg_price, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) - AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': year}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - PROJECT(columns={'month': MONTH(o_orderdate), 'o_totalprice': o_totalprice, 'year': year}) - FILTER(condition=year < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': year}) - PROJECT(columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice, 'year': YEAR(o_orderdate)}) + AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) + FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 41ba7be61..740b284ba 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,11 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) FILTER(condition=DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(DEFAULT_TO(sum_month_total_spent, 0:numeric), 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index d185fd519..c2708cd55 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,11 +1,10 @@ -ROOT(columns=[('name', n_name), ('suppliers_in_black', suppliers_in_black), ('total_suppliers', total_suppliers)], orderings=[]) - FILTER(condition=suppliers_in_black > 0.5:numeric * total_suppliers, columns={'n_name': n_name, 'suppliers_in_black': suppliers_in_black, 'total_suppliers': total_suppliers}) - PROJECT(columns={'n_name': n_name, 'suppliers_in_black': DEFAULT_TO(count_s_suppkey, 0:numeric), 'total_suppliers': total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) - FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) + FILTER(condition=DEFAULT_TO(count_s_suppkey, 0:numeric) > 0.5:numeric * total_suppliers, columns={'count_s_suppkey': count_s_suppkey, 'n_name': n_name, 'total_suppliers': total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) + FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'total_suppliers': COUNT(s_suppkey)}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 6c6ea66d7..9f0a06ac0 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,19 +1,17 @@ -ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', n_ticker_trans), ('n_type_trans', n_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t0.n_ticker_trans, 'n_ticker_type_trans': t0.n_ticker_type_trans, 'n_type_trans': t0.n_type_trans, 'sbTxId': t1.sbTxId}) - FILTER(condition=n_ticker_type_trans / n_type_trans < 0.2:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': n_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'n_type_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_trans': t1.n_ticker_trans, 'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - FILTER(condition=n_ticker_type_trans / n_ticker_trans > 0.8:numeric, columns={'n_ticker_trans': n_ticker_trans, 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - PROJECT(columns={'n_ticker_trans': DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric), 'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) +ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', DEFAULT_TO(sum_n_ticker_type_trans_1, 0:numeric)), ('n_type_trans', DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric))], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + FILTER(condition=n_ticker_type_trans / DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) < 0.2:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': sum_n_ticker_type_trans_1}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + FILTER(condition=n_ticker_type_trans / DEFAULT_TO(sum_n_ticker_type_trans, 0:numeric) > 0.8:numeric, columns={'n_ticker_type_trans': n_ticker_type_trans, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType, 'sum_n_ticker_type_trans': sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index a4d8aee87..15f3a6a3e 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,6 +1,5 @@ -ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', retail_price_int), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_size': t0.p_size, 'retail_price_int': t0.retail_price_int}) - LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': retail_price_int}, orderings=[(retail_price_int):asc_first]) - PROJECT(columns={'p_partkey': p_partkey, 'p_size': p_size, 'retail_price_int': INTEGER(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) +ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', INTEGER(p_retailprice)), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) + LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}, orderings=[(INTEGER(p_retailprice)):asc_first]) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index c3fd447c1..a0e6fdd19 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -4,8 +4,8 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) - FILTER(condition=RANKING(args=[], partition=[anything_p_size], order=[(total_qty):desc_first], allow_ties=False) == 1:numeric, columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': total_qty}) - PROJECT(columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + PROJECT(columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) + FILTER(condition=RANKING(args=[], partition=[anything_p_size], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'anything_p_size': anything_p_size, 'o_orderpriority': o_orderpriority, 'sum_l_quantity': sum_l_quantity}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'p_size': p_size}, aggregations={'anything_p_size': ANYTHING(p_size), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index f03df0318..8195645e6 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -2,8 +2,8 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_o JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(n_orders):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_orders': n_orders, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index f0f4f0fb3..e759b898f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', quantity), ('n_shipments', n_rows)], orderings=[(quantity):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 'quantity': t1.quantity, 's_name': t0.s_name}) +ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(quantity):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': quantity}) - PROJECT(columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'quantity': DEFAULT_TO(sum_l_quantity, 0:numeric)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) - FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) + FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 0bb678bcf..694b21b3a 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,10 +1,9 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) - AGGREGATE(keys={'month': MONTH(ca_dt), 'year': year}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows, 'year': t0.year}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows, 'year': t0.year}) - FILTER(condition=ISIN(year, [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt, 'year': year}) - PROJECT(columns={'ca_dt': ca_dt, 'year': YEAR(ca_dt)}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + AGGREGATE(keys={'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 45394da84..9413f086a 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,15 +1,14 @@ -ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[n_devices], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * n_devices - PREV(args=[n_devices], partition=[], order=[(year):asc_last]) / PREV(args=[n_devices], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', n_devices), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) - FILTER(condition=n_devices > 0:numeric, columns={'n_devices': n_devices, 'sum_n_rows': sum_n_rows, 'year': year}) - PROJECT(columns={'n_devices': DEFAULT_TO(sum_expr_3, 0:numeric), 'sum_n_rows': sum_n_rows, 'year': year}) - AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) +ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_3, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_expr_3, 0:numeric)), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) + FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) > 0:numeric, columns={'sum_expr_3': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year': year}) + AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) - SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) + SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) + AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) + SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 8b4fb4c9e..411981af0 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,18 +1,17 @@ -ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', VALUE)], orderings=[(VALUE):desc_last], limit=10:numeric) - FILTER(condition=VALUE > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'VALUE': VALUE, 'ps_partkey': ps_partkey}) - PROJECT(columns={'VALUE': DEFAULT_TO(sum_expr_2, 0:numeric), 'ps_partkey': ps_partkey, 'sum_metric': sum_metric}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) - SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(DEFAULT_TO(sum_expr_2, 0:numeric)):desc_last], limit=10:numeric) + FILTER(condition=DEFAULT_TO(sum_expr_2, 0:numeric) > DEFAULT_TO(sum_metric, 0:numeric) * 0.0001:numeric, columns={'ps_partkey': ps_partkey, 'sum_expr_2': sum_expr_2}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2, 'sum_metric': t0.sum_metric}) + AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index ff041d5aa..8acc50868 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,9 +1,8 @@ -ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', TOTAL_QUANTITY)], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - FILTER(condition=TOTAL_QUANTITY > 300:numeric, columns={'TOTAL_QUANTITY': TOTAL_QUANTITY, 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - PROJECT(columns={'TOTAL_QUANTITY': DEFAULT_TO(sum_l_quantity, 0:numeric), 'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) +ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 60e4e77d6..6d11fb64e 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,14 +1,13 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) - AGGREGATE(keys={'cntry_code': cntry_code}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'cntry_code': cntry_code}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'cntry_code': t0.cntry_code, 'n_rows': t1.n_rows}) - FILTER(condition=ISIN(cntry_code, ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': cntry_code}) - PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}) - FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) - AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) - FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + FILTER(condition=c_acctbal > global_avg_balance, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone, 'global_avg_balance': t0.global_avg_balance}) + AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) + FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + FILTER(condition=ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index cafd78875..6ac06680b 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -9,7 +9,7 @@ WITH _s0 AS ( GROUP BY DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), customer_id -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, _s0.quarter, @@ -25,7 +25,7 @@ SELECT quarter, state AS customer_state, COALESCE(sum_sum_sale_price, 0) AS total_sales -FROM _t2 +FROM _t1 WHERE NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 ORDER BY diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index d9d271f6e..96ad10d92 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -25,7 +25,7 @@ WITH _s0 AS ( ) AS TEXT) || ' months' ), customer_id -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s0.sum_sale_price) AS sum_sum_sale_price, _s0.quarter, @@ -41,7 +41,7 @@ SELECT quarter, state AS customer_state, COALESCE(sum_sum_sale_price, 0) AS total_sales -FROM _t2 +FROM _t1 WHERE NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 ORDER BY diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql index 1fd157a20..07f334d4d 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_ansi.sql @@ -1,10 +1,10 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT ca_dt FROM main.calendar WHERE EXTRACT(YEAR FROM CAST(ca_dt AS DATETIME)) IN (2020, 2021) -), _t6 AS ( +), _t5 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t3 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t5.ca_dt - FROM _t3 AS _t5 + _t4.ca_dt + FROM _t2 AS _t4 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATE_ADD(CAST(_t5.ca_dt AS TIMESTAMP), -6, 'MONTH') + ON calendar.ca_dt >= DATE_ADD(CAST(_t4.ca_dt AS TIMESTAMP), -6, 'MONTH') JOIN main.devices AS devices ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id GROUP BY - _t5.ca_dt + _t4.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t8.ca_dt - FROM _t3 AS _t8 + _t7.ca_dt + FROM _t2 AS _t7 JOIN main.incidents AS incidents - ON _t8.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + ON _t7.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t8.ca_dt + _t7.ca_dt ) SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t2.ca_dt AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME))), ( + WHEN LENGTH(EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME))), ( 2 * -1 )) END @@ -53,13 +53,13 @@ SELECT ROUND(( 1000000.0 * COALESCE(SUM(_s15.n_rows), 0) ) / COALESCE(SUM(_s7.n_rows), 0), 2) AS ir -FROM _t3 AS _t3 +FROM _t2 AS _t2 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t2.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t2.ca_dt GROUP BY - EXTRACT(MONTH FROM CAST(_t3.ca_dt AS DATETIME)), - EXTRACT(YEAR FROM CAST(_t3.ca_dt AS DATETIME)) + EXTRACT(MONTH FROM CAST(_t2.ca_dt AS DATETIME)), + EXTRACT(YEAR FROM CAST(_t2.ca_dt AS DATETIME)) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql index b30eff167..221bed4db 100644 --- a/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_sqlite.sql @@ -1,10 +1,10 @@ -WITH _t3 AS ( +WITH _t2 AS ( SELECT ca_dt FROM main.calendar WHERE CAST(STRFTIME('%Y', ca_dt) AS INTEGER) IN (2020, 2021) -), _t6 AS ( +), _t5 AS ( SELECT co_id, co_name @@ -14,38 +14,38 @@ WITH _t3 AS ( ), _s7 AS ( SELECT COUNT(*) AS n_rows, - _t5.ca_dt - FROM _t3 AS _t5 + _t4.ca_dt + FROM _t2 AS _t4 JOIN main.calendar AS calendar - ON calendar.ca_dt >= DATETIME(_t5.ca_dt, '-6 month') + ON calendar.ca_dt >= DATETIME(_t4.ca_dt, '-6 month') JOIN main.devices AS devices ON calendar.ca_dt = DATE(devices.de_purchase_ts, 'start of day') - JOIN _t6 AS _t6 - ON _t6.co_id = devices.de_production_country_id + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id GROUP BY - _t5.ca_dt + _t4.ca_dt ), _s15 AS ( SELECT COUNT(*) AS n_rows, - _t8.ca_dt - FROM _t3 AS _t8 + _t7.ca_dt + FROM _t2 AS _t7 JOIN main.incidents AS incidents - ON _t8.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') + ON _t7.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') JOIN main.devices AS devices ON devices.de_id = incidents.in_device_id - JOIN _t6 AS _t9 - ON _t9.co_id = devices.de_production_country_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id GROUP BY - _t8.ca_dt + _t7.ca_dt ) SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER), + CAST(STRFTIME('%Y', _t2.ca_dt) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER), ( 2 * -1 )) END @@ -56,13 +56,13 @@ SELECT ) AS REAL) / COALESCE(SUM(_s7.n_rows), 0), 2 ) AS ir -FROM _t3 AS _t3 +FROM _t2 AS _t2 LEFT JOIN _s7 AS _s7 - ON _s7.ca_dt = _t3.ca_dt + ON _s7.ca_dt = _t2.ca_dt LEFT JOIN _s15 AS _s15 - ON _s15.ca_dt = _t3.ca_dt + ON _s15.ca_dt = _t2.ca_dt GROUP BY - CAST(STRFTIME('%m', _t3.ca_dt) AS INTEGER), - CAST(STRFTIME('%Y', _t3.ca_dt) AS INTEGER) + CAST(STRFTIME('%Y', _t2.ca_dt) AS INTEGER), + CAST(STRFTIME('%m', _t2.ca_dt) AS INTEGER) ORDER BY month diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql index 1f40c54cb..461f9fb2c 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_ansi.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) GROUP BY _s4.ca_dt -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -57,7 +57,7 @@ SELECT ) AS pct_incident_change, COALESCE(sum_expr_3, 0) AS bought, COALESCE(sum_n_rows, 0) AS incidents -FROM _t2 +FROM _t1 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql index 53b91a35c..d0661f3f0 100644 --- a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_sqlite.sql @@ -20,7 +20,7 @@ WITH _s2 AS ( ON _s4.ca_dt = DATE(incidents.in_error_report_ts, 'start of day') GROUP BY _s4.ca_dt -), _t2 AS ( +), _t1 AS ( SELECT SUM(_s3.n_rows) AS sum_expr_3, SUM(_s7.n_rows) AS sum_n_rows, @@ -57,7 +57,7 @@ SELECT ) AS pct_incident_change, COALESCE(sum_expr_3, 0) AS bought, COALESCE(sum_n_rows, 0) AS incidents -FROM _t2 +FROM _t1 WHERE NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 ORDER BY diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index 212223900..3f7009ea0 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -39,5 +39,5 @@ JOIN _s9 AS _s9 COALESCE(_s8.sum_metric, 0) * 0.0001 ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY - value DESC + COALESCE(_s9.sum_expr_2, 0) DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index 212223900..3f7009ea0 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -39,5 +39,5 @@ JOIN _s9 AS _s9 COALESCE(_s8.sum_metric, 0) * 0.0001 ) < COALESCE(_s9.sum_expr_2, 0) ORDER BY - value DESC + COALESCE(_s9.sum_expr_2, 0) DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q18_ansi.sql b/tests/test_sql_refsols/tpch_q18_ansi.sql index 575feb447..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_ansi.sql +++ b/tests/test_sql_refsols/tpch_q18_ansi.sql @@ -1,4 +1,4 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _s3 AS _s3 - ON NOT _s3.sum_l_quantity IS NULL - AND _s3.l_orderkey = orders.o_orderkey - AND _s3.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate diff --git a/tests/test_sql_refsols/tpch_q18_sqlite.sql b/tests/test_sql_refsols/tpch_q18_sqlite.sql index 575feb447..aa9134752 100644 --- a/tests/test_sql_refsols/tpch_q18_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q18_sqlite.sql @@ -1,4 +1,4 @@ -WITH _s3 AS ( +WITH _t1 AS ( SELECT SUM(l_quantity) AS sum_l_quantity, l_orderkey @@ -12,14 +12,14 @@ SELECT orders.o_orderkey AS O_ORDERKEY, orders.o_orderdate AS O_ORDERDATE, orders.o_totalprice AS O_TOTALPRICE, - COALESCE(_s3.sum_l_quantity, 0) AS TOTAL_QUANTITY + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY FROM tpch.orders AS orders JOIN tpch.customer AS customer ON customer.c_custkey = orders.o_custkey -JOIN _s3 AS _s3 - ON NOT _s3.sum_l_quantity IS NULL - AND _s3.l_orderkey = orders.o_orderkey - AND _s3.sum_l_quantity > 300 +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 ORDER BY o_totalprice DESC, o_orderdate From 60be207cff381583392419c6b435778f2f21b7c9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 17:10:25 -0400 Subject: [PATCH 60/97] [RUN CI] From ade8f35b514f526102a7f8a2ea1964d032be15f0 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 18:08:54 -0400 Subject: [PATCH 61/97] Adding more simplification patterns and tests --- pydough/conversion/relational_converter.py | 16 +- .../conversion/relational_simplification.py | 183 ++++++++++++++++-- pydough/relational/rel_util.py | 22 ++- tests/test_pipeline_defog_custom.py | 122 ++++++++++++ .../agg_simplification_1.txt | 2 +- tests/test_plan_refsols/aggregate_anti.txt | 2 +- tests/test_plan_refsols/anti_aggregate.txt | 2 +- .../anti_aggregate_alternate.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/simplification_2.txt | 2 + tests/test_plan_refsols/simplification_3.txt | 3 + .../agg_simplification_1_ansi.sql | 2 +- .../agg_simplification_1_sqlite.sql | 2 +- .../simplification_2_ansi.sql | 41 ++++ .../simplification_2_sqlite.sql | 41 ++++ .../simplification_3_ansi.sql | 12 ++ .../simplification_3_sqlite.sql | 12 ++ 17 files changed, 438 insertions(+), 30 deletions(-) create mode 100644 tests/test_plan_refsols/simplification_2.txt create mode 100644 tests/test_plan_refsols/simplification_3.txt create mode 100644 tests/test_sql_refsols/simplification_2_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_2_sqlite.sql create mode 100644 tests/test_sql_refsols/simplification_3_ansi.sql create mode 100644 tests/test_sql_refsols/simplification_3_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 71aaeab4e..0caf6c7e0 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1448,22 +1448,24 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup followed by column pruning 2x. + # Step 8: the following pipeline twice: + # A: projection pullup + # B: simplification + # C: filter pushdown + # D: column pruning for _ in range(2): root = confirm_root(pullup_projections(root)) simplify_expressions(root) + root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown - root._input = push_filters(root.input, set()) - - # Step 10: re-run projection merging, without pushing into joins. + # Step 9: re-run projection merging, without pushing into joins. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 11: re-run column bubbling + # Step 10: re-run column bubbling root = bubble_column_names(root) - # Step 12: re-run column pruning. + # Step 11: re-run column pruning. root = ColumnPruner().prune_unused_columns(root) return root diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 70e8ef858..6d43d4e0e 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -153,6 +153,12 @@ def simplify_function_call( ): output_predicates.add(LogicalPredicate.NOT_NULL) case pydop.DEFAULT_TO: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] if LogicalPredicate.NOT_NULL in arg_predicates[0]: output_expr = expr.inputs[0] output_predicates = arg_predicates[0] @@ -171,22 +177,140 @@ def simplify_function_call( output_predicates = arg_predicates[0] else: output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - case ( - pydop.LENGTH - | pydop.BAN - | pydop.BOR - | pydop.BXR - | pydop.STARTSWITH - | pydop.ENDSWITH - | pydop.CONTAINS - | pydop.LIKE - | pydop.SQRT - | pydop.MONOTONIC - ): + case pydop.LENGTH: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + str_len: int = len(expr.inputs[0].value) + output_expr = LiteralExpression(str_len, expr.data_type) + if str_len > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.LOWER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.lower(), expr.data_type + ) + case pydop.UPPER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.upper(), expr.data_type + ) + case pydop.STARTSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.startswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.ENDSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.CONTAINS: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[1].value in expr.inputs[0].value, expr.data_type + ) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.SQRT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, (int, float)) + and expr.inputs[0].value >= 0 + ): + sqrt_value: float = expr.inputs[0].value ** 0.5 + output_expr = LiteralExpression(sqrt_value, expr.data_type) + if sqrt_value > 0: + output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.MONOTONIC: + v0: int | float | None = None + v1: int | float | None = None + v2: int | float | None = None + monotonic_result: bool + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, (int, float) + ): + v0 = expr.inputs[0].value + if isinstance(expr.inputs[1], LiteralExpression) and isinstance( + expr.inputs[1].value, (int, float) + ): + v1 = expr.inputs[1].value + if isinstance(expr.inputs[2], LiteralExpression) and isinstance( + expr.inputs[2].value, (int, float) + ): + v2 = expr.inputs[2].value + if v0 is not None and v1 is not None and v2 is not None: + monotonic_result = (v0 <= v1) and (v1 <= v2) + output_expr = LiteralExpression(monotonic_result, expr.data_type) + if monotonic_result: + output_predicates.add(LogicalPredicate.POSITIVE) + elif v0 is not None and v1 is not None: + if v0 <= v1: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[1:] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + elif v1 is not None and v2 is not None: + if v1 <= v2: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[:2] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BXR | pydop.LIKE: + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BAN: + if any( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) + if all( + isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + case pydop.BOR: + if any( + isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(True, expr.data_type) + if all( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: - match (expr.op, expr.inputs[1]): - case (pydop.GRT, LiteralExpression()) if ( + match (expr.inputs[0], expr.op, expr.inputs[1]): + case (_, pydop.GRT, LiteralExpression()) if ( expr.inputs[1].value == 0 and LogicalPredicate.POSITIVE in arg_predicates[0] ): @@ -194,7 +318,7 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) output_predicates.add(LogicalPredicate.POSITIVE) - case (pydop.GEQ, LiteralExpression()) if ( + case (_, pydop.GEQ, LiteralExpression()) if ( expr.inputs[1].value == 0 and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] ): @@ -202,6 +326,35 @@ def simplify_function_call( output_predicates.add(LogicalPredicate.NOT_NULL) output_predicates.add(LogicalPredicate.NOT_NEGATIVE) output_predicates.add(LogicalPredicate.POSITIVE) + case (LiteralExpression(), _, LiteralExpression()): + match ( + expr.inputs[0].value, + expr.inputs[1].value, + expr.op, + ): + case (None, _, _) | (_, None, _): + output_expr = LiteralExpression(None, expr.data_type) + case (x, y, pydop.EQU): + output_expr = LiteralExpression(x == y, expr.data_type) + case (x, y, pydop.NEQ): + output_expr = LiteralExpression(x != y, expr.data_type) + case (x, y, pydop.LET) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore + case (x, y, pydop.LEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore + case (x, y, pydop.GRT) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore + case (x, y, pydop.GEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore + case _: pass output_predicates.add(LogicalPredicate.NOT_NEGATIVE) diff --git a/pydough/relational/rel_util.py b/pydough/relational/rel_util.py index 91e5919f9..12255731c 100644 --- a/pydough/relational/rel_util.py +++ b/pydough/relational/rel_util.py @@ -54,6 +54,15 @@ pydop.STARTSWITH, pydop.ENDSWITH, pydop.CONTAINS, + pydop.REPLACE, + pydop.FIND, + pydop.GETPART, + pydop.LPAD, + pydop.RPAD, + pydop.STRCOUNT, + pydop.INTEGER, + pydop.FLOAT, + pydop.STRING, pydop.LIKE, pydop.LOWER, pydop.UPPER, @@ -72,10 +81,21 @@ pydop.SUB, pydop.MUL, pydop.DIV, + pydop.ABS, + pydop.FLOOR, + pydop.LARGEST, + pydop.SMALLEST, + pydop.CEIL, + pydop.MONOTONIC, + pydop.POW, + pydop.POWER, + pydop.SQRT, + pydop.ROUND, + pydop.SLICE, } """ A set of operators with the property that the output is null if any of the -inputs are null. +column inputs are null. """ diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 9f0a587c0..58265f72b 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1671,6 +1671,128 @@ def get_day_of_week( ), id="simplification_1", ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = DEFAULT_TO(None, 0) == 0," # -> True + " s01 = DEFAULT_TO(None, 0) != 0," # -> False + " s02 = DEFAULT_TO(None, 0) >= 0," # -> True + " s03 = DEFAULT_TO(None, 0) > 0," # -> False + " s04 = DEFAULT_TO(None, 0) <= 0," # -> True + " s05 = DEFAULT_TO(None, 0) < 0," # -> False + " s06 = DEFAULT_TO(None, 0) == None," # -> None + " s07 = DEFAULT_TO(None, 0) != None," # -> None + " s08 = DEFAULT_TO(None, 0) >= None," # -> None + " s09 = DEFAULT_TO(None, 0) > None," # -> None + " s10 = DEFAULT_TO(None, 0) <= None," # -> None + " s11 = DEFAULT_TO(None, 0) < None," # -> None + " s12 = DEFAULT_TO(None, 'ab') == 'cd'," # -> False + " s13 = DEFAULT_TO(None, 'ab') != 'cd'," # -> True + " s14 = DEFAULT_TO(None, 'ab') >= 'cd'," # -> False + " s15 = DEFAULT_TO(None, 'ab') > 'cd'," # -> False + " s16 = DEFAULT_TO(None, 'ab') <= 'cd'," # -> True + " s17 = DEFAULT_TO(None, 'ab') < 'cd'," # -> True + " s18 = True | (COUNT(customers) > 10)," # -> True + " s19 = False & (COUNT(customers) > 10)," # -> False + " s20 = False | (LENGTH('foo') > 0)," # -> True + " s21 = False | (LENGTH('foo') < 0)," # -> False + " s22 = True & (LENGTH('foo') > 0)," # -> True + " s23 = True & (LENGTH('foo') < 0)," # -> False + " s24 = STARTSWITH('a', 'abc')," # -> False + " s25 = STARTSWITH('abc', 'a')," # -> True + " s26 = ENDSWITH('abc', 'c')," # -> True + " s27 = ENDSWITH('abc', 'ab')," # -> False + " s28 = CONTAINS('abc', 'b')," # -> True + " s29 = CONTAINS('abc', 'B')," # -> False + " s30 = LENGTH('alphabet')," # -> 8 + " s31 = LOWER('AlPhAbEt')," # -> 'alphabet' + " s32 = UPPER('sOuP')," # -> 'SOUP' + " s33 = True == True," # -> True + " s34 = True != True," # -> False + " s35 = True == False," # -> False + " s36 = True != False," # -> True + " s37 = SQRT(9)," # -> 3.0 + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [1], + "s01": [0], + "s02": [1], + "s03": [0], + "s04": [1], + "s05": [0], + "s06": [None], + "s07": [None], + "s08": [None], + "s09": [None], + "s10": [None], + "s11": [None], + "s12": [0], + "s13": [1], + "s14": [0], + "s15": [0], + "s16": [1], + "s17": [1], + "s18": [1], + "s19": [0], + "s20": [1], + "s21": [0], + "s22": [1], + "s23": [0], + "s24": [0], + "s25": [1], + "s26": [1], + "s27": [0], + "s28": [1], + "s29": [0], + "s30": [8], + "s31": ["alphabet"], + "s32": ["SOUP"], + "s33": [1], + "s34": [0], + "s35": [0], + "s36": [1], + "s37": [3.0], + } + ), + "simplification_2", + ), + id="simplification_2", + ), + pytest.param( + PyDoughPandasTest( + "result = Broker.CALCULATE(" + " s00 = MONOTONIC(1, 2, 3)," # -> True + " s01 = MONOTONIC(1, 1, 1)," # -> True + " s02 = MONOTONIC(1, 0, 3)," # -> False + " s03 = MONOTONIC(1, 4, 3)," # -> False + " s04 = MONOTONIC(1, 2, 1)," # -> False + " s05 = MONOTONIC(1, 0, 1)," # -> False + " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(customers))," # -> 3 <= COUNT(customers) + " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(customers))," # False + " s08 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 9)," # -> COUNT(customers) <= 6 + " s09 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 5)," # -> False + ")", + "Broker", + lambda: pd.DataFrame( + { + "s00": [1], + "s01": [1], + "s02": [0], + "s03": [0], + "s04": [0], + "s05": [0], + "s06": [1], + "s07": [0], + "s08": [0], + "s09": [0], + } + ), + "simplification_3", + ), + id="simplification_3", + ), ], ) def defog_custom_pipeline_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/agg_simplification_1.txt b/tests/test_plan_refsols/agg_simplification_1.txt index 03716b635..e067b9553 100644 --- a/tests/test_plan_refsols/agg_simplification_1.txt +++ b/tests/test_plan_refsols/agg_simplification_1.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', DEFAULT_TO(None:unknown, 0:numeric)), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) +ROOT(columns=[('aug_exchange', aug_exchange), ('su1', count_one), ('su2', count_one * 2:numeric), ('su3', count_one * -1:numeric), ('su4', count_one * -3:numeric), ('su5', 0:numeric), ('su6', count_one * 0.5:numeric), ('su7', 0:numeric), ('su8', DEFAULT_TO(aug_exchange, 0:numeric)), ('co1', count_one), ('co2', count_one), ('co3', count_one), ('co4', count_one), ('co5', count_one), ('co6', count_one), ('co7', 0:numeric), ('co8', count_one * INTEGER(PRESENT(aug_exchange))), ('nd1', 1:numeric), ('nd2', 1:numeric), ('nd3', 1:numeric), ('nd4', 1:numeric), ('nd5', 1:numeric), ('nd6', 1:numeric), ('nd7', 0:numeric), ('nd8', INTEGER(PRESENT(aug_exchange))), ('av1', 1:numeric), ('av2', 2:numeric), ('av3', -1:numeric), ('av4', -3:numeric), ('av5', 0:numeric), ('av6', 0.5:numeric), ('av7', None:unknown), ('av8', aug_exchange), ('mi1', 1:numeric), ('mi2', 2:numeric), ('mi3', -1:numeric), ('mi4', -3:numeric), ('mi5', 0:numeric), ('mi6', 0.5:numeric), ('mi7', None:unknown), ('mi8', aug_exchange), ('ma1', 1:numeric), ('ma2', 2:numeric), ('ma3', -1:numeric), ('ma4', -3:numeric), ('ma5', 0:numeric), ('ma6', 0.5:numeric), ('ma7', None:unknown), ('ma8', aug_exchange), ('an1', 1:numeric), ('an2', 2:numeric), ('an3', -1:numeric), ('an4', -3:numeric), ('an5', 0:numeric), ('an6', 0.5:numeric), ('an7', None:unknown), ('an8', aug_exchange), ('me1', 1:numeric), ('me2', 2:numeric), ('me3', -1:numeric), ('me4', -3:numeric), ('me5', 0:numeric), ('me6', 0.5:numeric), ('me7', None:unknown), ('me8', aug_exchange), ('qu1', 1:numeric), ('qu2', 2:numeric), ('qu3', -1:numeric), ('qu4', -3:numeric), ('qu5', 0:numeric), ('qu6', 0.5:numeric), ('qu7', None:unknown), ('qu8', agg_63)], orderings=[(aug_exchange):asc_first]) AGGREGATE(keys={'aug_exchange': LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string))}, aggregations={'agg_63': QUANTILE(LENGTH(KEEP_IF(sbTickerExchange, sbTickerExchange != 'NYSE Arca':string)), 0.8:numeric), 'count_one': COUNT()}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index e1482682d..02458ffdc 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index e1482682d..02458ffdc 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index af1852c80..598407ce0 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', s_name), ('num_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('avg_price_of_10parts', DEFAULT_TO(None:unknown, 0:numeric)), ('sum_price_of_10parts', None:unknown)], orderings=[]) +ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 6ba1a011d..a9c40b49d 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('name', r_name), ('n_prefix_nations', DEFAULT_TO(None:unknown, 0:numeric))], orderings=[]) +ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_UNKNOWN, columns={'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/simplification_2.txt b/tests/test_plan_refsols/simplification_2.txt new file mode 100644 index 000000000..4e9433c17 --- /dev/null +++ b/tests/test_plan_refsols/simplification_2.txt @@ -0,0 +1,2 @@ +ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric)], orderings=[]) + EMPTYSINGLETON() diff --git a/tests/test_plan_refsols/simplification_3.txt b/tests/test_plan_refsols/simplification_3.txt new file mode 100644 index 000000000..8078734b7 --- /dev/null +++ b/tests/test_plan_refsols/simplification_3.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={}) diff --git a/tests/test_sql_refsols/agg_simplification_1_ansi.sql b/tests/test_sql_refsols/agg_simplification_1_ansi.sql index 1807f6d2e..49b80edfc 100644 --- a/tests/test_sql_refsols/agg_simplification_1_ansi.sql +++ b/tests/test_sql_refsols/agg_simplification_1_ansi.sql @@ -6,7 +6,7 @@ SELECT COUNT(*) * -3 AS su4, 0 AS su5, COUNT(*) * 0.5 AS su6, - COALESCE(NULL, 0) AS su7, + 0 AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), 0 diff --git a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql index 515f11664..4ebaf317c 100644 --- a/tests/test_sql_refsols/agg_simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/agg_simplification_1_sqlite.sql @@ -159,7 +159,7 @@ SELECT COUNT(*) * -3 AS su4, 0 AS su5, COUNT(*) * 0.5 AS su6, - COALESCE(NULL, 0) AS su7, + 0 AS su7, COALESCE( LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END), 0 diff --git a/tests/test_sql_refsols/simplification_2_ansi.sql b/tests/test_sql_refsols/simplification_2_ansi.sql new file mode 100644 index 000000000..237dabec3 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_ansi.sql @@ -0,0 +1,41 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simplification_2_sqlite.sql b/tests/test_sql_refsols/simplification_2_sqlite.sql new file mode 100644 index 000000000..b5d5d71f5 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_sqlite.sql @@ -0,0 +1,41 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37 +FROM (VALUES + (NULL)) AS _q_0 diff --git a/tests/test_sql_refsols/simplification_3_ansi.sql b/tests/test_sql_refsols/simplification_3_ansi.sql new file mode 100644 index 000000000..3b49cd41b --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_ansi.sql @@ -0,0 +1,12 @@ +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_sqlite.sql b/tests/test_sql_refsols/simplification_3_sqlite.sql new file mode 100644 index 000000000..3b49cd41b --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_sqlite.sql @@ -0,0 +1,12 @@ +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09 +FROM main.sbcustomer From 3d9167e76a0f74a51c01b75dc834ef2bd0fa9f74 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 21 Jul 2025 18:09:20 -0400 Subject: [PATCH 62/97] [RUN CI] From 3d17cad4793c21abb7c2cb6724c0632b4b56f1be Mon Sep 17 00:00:00 2001 From: knassre-bodo <105652923+knassre-bodo@users.noreply.github.com> Date: Tue, 22 Jul 2025 01:11:14 -0400 Subject: [PATCH 63/97] Update pydough/conversion/projection_pullup.py Co-authored-by: Hadia Ahmed --- pydough/conversion/projection_pullup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index a2c891a1b..55931e4d3 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -181,7 +181,7 @@ def pull_project_helper( ) # Iterate through the columns of the project to see which ones can be - # pulled up into the parent, dding them to a substitutions mapping that + # pulled up into the parent, adding them to a substitutions mapping that # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): From df2e4018553eb141f9c00b37aa6dd5bdab687e41 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 22 Jul 2025 01:30:49 -0400 Subject: [PATCH 64/97] Final revisions/documentation [RUN CI] --- pydough/conversion/merge_projects.py | 22 +++++++------- pydough/conversion/projection_pullup.py | 19 +++++++----- pydough/conversion/relational_converter.py | 34 +++++++++++++++------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/pydough/conversion/merge_projects.py b/pydough/conversion/merge_projects.py index 8c400cb86..a310aefef 100644 --- a/pydough/conversion/merge_projects.py +++ b/pydough/conversion/merge_projects.py @@ -197,18 +197,17 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: if isinstance(node, RelationalRoot): # Replace all column references in the root's columns with # the expressions from the child projection. - for idx, (name, expr) in enumerate(node.ordered_columns): - new_expr = transpose_expression(expr, child_project.columns) - node.columns[name] = new_expr - node.ordered_columns[idx] = (name, new_expr) + node._ordered_columns = [ + (name, transpose_expression(expr, node.input.columns)) + for name, expr in node.ordered_columns + ] + node._columns = dict(node.ordered_columns) # Do the same with the sort expressions. for idx, sort_info in enumerate(node.orderings): new_expr = transpose_expression( sort_info.expr, child_project.columns ) - node.orderings[idx] = ExpressionSortInfo( - new_expr, sort_info.ascending, sort_info.nulls_first - ) + node.orderings[idx].expr = new_expr # Delete the child projection from the tree, replacing it # with its input. node._input = child_project.input @@ -262,10 +261,11 @@ def merge_adjacent_projects(node: RelationalRoot | Project) -> RelationalNode: # If the orderings are the same, pull in the limit into the root. # Replace all column references in the root's columns with # the expressions from the child projection. - for idx, (name, expr) in enumerate(node.ordered_columns): - new_expr = transpose_expression(expr, node.input.columns) - node.columns[name] = new_expr - node.ordered_columns[idx] = (name, new_expr) + node._ordered_columns = [ + (name, transpose_expression(expr, node.input.columns)) + for name, expr in node.ordered_columns + ] + node._columns = dict(node.ordered_columns) node._orderings = new_orderings node._limit = node.input.limit # Delete the child projection from the tree, replacing it diff --git a/pydough/conversion/projection_pullup.py b/pydough/conversion/projection_pullup.py index 55931e4d3..0e4065fd1 100644 --- a/pydough/conversion/projection_pullup.py +++ b/pydough/conversion/projection_pullup.py @@ -64,7 +64,8 @@ def widen_columns( } # Pull all the columns from each input to the node into the node's output - # columns if they are not already in the node's output columns. + # columns if they are not already in the node's output columns. Make sure + # not to include no-op mappings. for input_idx in range(len(node.inputs)): input_alias: str | None = node.default_input_aliases[input_idx] input_node: RelationalNode = node.inputs[input_idx] @@ -87,12 +88,13 @@ def widen_columns( new_ref: ColumnReference = ColumnReference(new_name, expr.data_type) node.columns[new_name] = ref_expr existing_vals[expr] = ref_expr - substitutions[ref_expr] = new_ref - else: + if ref_expr != new_ref: + substitutions[ref_expr] = new_ref + elif ref_expr != existing_vals[expr]: substitutions[ref_expr] = existing_vals[expr] - # Return the substitution mapping, without any no-op substitutions - return {k: v for k, v in substitutions.items() if k != v} + # Return the substitution mapping + return substitutions def pull_non_columns(node: Join | Filter | Limit) -> RelationalNode: @@ -181,7 +183,7 @@ def pull_project_helper( ) # Iterate through the columns of the project to see which ones can be - # pulled up into the parent, adding them to a substitutions mapping that + # pulled up into the parent, dding them to a substitutions mapping that # will be used to apply the transformations. substitutions: dict[RelationalExpression, RelationalExpression] = {} for name, expr in project.columns.items(): @@ -205,7 +207,8 @@ def pull_project_into_join(node: Join, input_index: int) -> None: Args: `node`: The Join node to pull the Project columns into. `input_index`: The index of the input to the Join node that should have - its columns pulled up, if it is a project node. + its columns pulled up, if it is a project node. This is assumed to be + either 0 (for the LHS) or 1 (for the RHS). """ # Skip if the input at the specified input is not a Project node. @@ -480,7 +483,7 @@ def pull_project_into_aggregate(node: Aggregate) -> RelationalNode: possible. This transformation is done in-place. Args: - `node`: The Filter node to pull the Project columns into. + `node`: The Aggregate node to pull the Project columns into. """ if not isinstance(node.input, Project): return node diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index f66debc38..93303b7d8 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1417,7 +1417,7 @@ def optimize_relational_tree( Returns: The optimized relational root. """ - # Step 1: push filters down as far as possible + # Step 1: push filters down as far as possible. root._input = push_filters(root.input, set()) # Step 2: merge adjacent projections, unless it would result in excessive @@ -1432,10 +1432,14 @@ def optimize_relational_tree( # operating on already unique data. root = remove_redundant_aggs(root) - # Step 5: re-run projection merging. + # Step 5: re-run projection merging since the removal of redundant + # aggregations may have created redundant projections that can be deleted. root = confirm_root(merge_projects(root)) - # Step 6: re-run column pruning. + # Step 6: re-run column pruning after the various steps, which may have + # rendered more columns unused. This is done befre the next step to remove + # as many column names as possible so the column bubbling step can try to + # use nicer names without worrying about collisions. root = ColumnPruner().prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler @@ -1443,21 +1447,31 @@ def optimize_relational_tree( # possible. root = bubble_column_names(root) - # Step 8: run projection pullup followed by column pruning 2x. + # Step 8: the following pipeline twice: + # A: projection pullup + # B: filter pushdown + # C: column pruning + # This is done because pullup will create more opportunities for filter + # pushdown, and the two together will create more opportunities for + # column pruning, the latter of which will unlock more opportunities for + # pullup and pushdown and so on. for _ in range(2): root = confirm_root(pullup_projections(root)) + root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) - # Step 9: re-run filter pushdown - root._input = push_filters(root.input, set()) - - # Step 10: re-run projection merging, without pushing into joins. + # Step 9: re-run projection merging, without pushing into joins. This + # will allow some redundant projections created by pullup to be removed + # entirely. root = confirm_root(merge_projects(root, push_into_joins=False)) - # Step 11: re-run column bubbling + # Step 10: re-run column bubbling to further simplify the final names of + # columns in the output now that more columns have been pruned, and delete + # any new duplicate columns that were created during the pullup step. root = bubble_column_names(root) - # Step 12: re-run column pruning. + # Step 11: re-run column pruning one last time to remove any columns that + # are no longer used after the final round of transformations. root = ColumnPruner().prune_unused_columns(root) return root From ce7e035a3f1a48045b40cb20567fbce37b288057 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 11:06:16 -0400 Subject: [PATCH 65/97] Completed refactor of how simplification predicates work to use a PredicateSet object --- .../conversion/relational_simplification.py | 455 ++++++++++++------ tests/test_plan_refsols/simplification_1.txt | 2 +- .../simplification_1_ansi.sql | 2 +- .../simplification_1_sqlite.sql | 2 +- 4 files changed, 323 insertions(+), 138 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 6d43d4e0e..341ff9a0d 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -5,7 +5,7 @@ __all__ = ["simplify_expressions"] -from enum import Enum +from dataclasses import dataclass import pydough.pydough_operators as pydop from pydough.relational import ( @@ -31,15 +31,81 @@ ) -class LogicalPredicate(Enum): +@dataclass +class PredicateSet: """ - Enum representing logical predicates that can be inferred about relational - expressions. + A set of logical predicates that can be inferred about relational + expressions and used to simplify other expressions. """ - NOT_NULL = "NOT_NULL" - NOT_NEGATIVE = "NOT_NEGATIVE" - POSITIVE = "POSITIVE" + not_null: bool = False + """ + Whether the expression is guaranteed to not be null. + """ + + not_negative: bool = False + """ + Whether the expression is guaranteed to not be negative. + """ + + positive: bool = False + """ + Whether the expression is guaranteed to be positive. + """ + + def __or__(self, other: "PredicateSet") -> "PredicateSet": + """ + Combines two predicate sets using a logical OR operation. + """ + return PredicateSet( + not_null=self.not_null or other.not_null, + not_negative=self.not_negative or other.not_negative, + positive=self.positive or other.positive, + ) + + def __and__(self, other: "PredicateSet") -> "PredicateSet": + """ + Combines two predicate sets using a logical AND operation. + """ + return PredicateSet( + not_null=self.not_null and other.not_null, + not_negative=self.not_negative and other.not_negative, + positive=self.positive and other.positive, + ) + + def __sub__(self, other: "PredicateSet") -> "PredicateSet": + """ + Subtracts one predicate set from another. + """ + return PredicateSet( + not_null=self.not_null and not other.not_null, + not_negative=self.not_negative and not other.not_negative, + positive=self.positive and not other.positive, + ) + + @staticmethod + def union(predicates: list["PredicateSet"]) -> "PredicateSet": + """ + Computes the union of a list of predicate sets. + """ + result: PredicateSet = PredicateSet() + for pred in predicates[1:]: + result = result | pred + return result + + @staticmethod + def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": + """ + Computes the intersection of a list of predicate sets. + """ + result: PredicateSet = PredicateSet() + if len(predicates) == 0: + return result + else: + result |= predicates[0] + for pred in predicates[1:]: + result = result & pred + return result NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { @@ -93,34 +159,50 @@ class LogicalPredicate(Enum): def simplify_function_call( expr: CallExpression, - arg_predicates: list[set[LogicalPredicate]], + arg_predicates: list[PredicateSet], no_group_aggregate: bool, -) -> tuple[RelationalExpression, set[LogicalPredicate]]: +) -> tuple[RelationalExpression, PredicateSet]: """ TODO """ output_expr: RelationalExpression = expr - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() + union_set: PredicateSet = PredicateSet.union(arg_predicates) + intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) + + # If the call has null propagating rules, all of hte arguments are non-null, + # the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: - if all(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): - output_predicates.add(LogicalPredicate.NOT_NULL) + if intersect_set.not_null: + output_predicates.not_null = True + match expr.op: case pydop.COUNT | pydop.NDISTINCT: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null + # and non-negative. + output_predicates.not_null = True + output_predicates.not_negative = True + + # The output if COUNT(*) is positive if unless doing a no-groupby + # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null + # column. if not no_group_aggregate: - if ( - len(expr.inputs) == 0 - or LogicalPredicate.NOT_NULL in arg_predicates[0] - ): - output_predicates.add(LogicalPredicate.POSITIVE) + if len(expr.inputs) == 0 or arg_predicates[0].not_null: + output_predicates.positive = True + + # COUNT(x) where x is non-null can be rewritten as COUNT(*), which + # has the same positive rule as before. elif ( expr.op == pydop.COUNT and len(expr.inputs) == 1 - and LogicalPredicate.NOT_NULL in arg_predicates[0] + and arg_predicates[0].not_null ): - output_predicates.add(LogicalPredicate.POSITIVE) + if not no_group_aggregate: + output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) + + # All of these operators are non-null aor non-negative if their first + # argument is. case ( pydop.SUM | pydop.AVG @@ -130,53 +212,79 @@ def simplify_function_call( | pydop.MEDIAN | pydop.QUANTILE ): - for predicate in [ - LogicalPredicate.NOT_NEGATIVE, - LogicalPredicate.POSITIVE, - ]: - if predicate in arg_predicates[0]: - output_predicates.add(predicate) + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) + + # The result of addition is non-negative or positive if all the + # operands are. It is also positive if all the operands are non-negative + # and at least one of them is positive. + case pydop.ADD: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) + if intersect_set.not_negative and union_set.positive: + output_predicates.positive = True + + # The result of multiplication is non-negative or positive if all the + # operands are. + case pydop.MUL: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) + + # The result of division is non-negative or positive if all the + # operands are, and is also non-null if both operands are non-null and + # the second operand is positive. + case pydop.DIV: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True + ) if ( - LogicalPredicate.NOT_NULL in arg_predicates[0] - and not no_group_aggregate + arg_predicates[0].not_null + and arg_predicates[1].not_null + and arg_predicates[1].positive ): - output_predicates.add(LogicalPredicate.NOT_NULL) - case pydop.ADD | pydop.MUL | pydop.DIV: - for predicate in [LogicalPredicate.NOT_NEGATIVE, LogicalPredicate.POSITIVE]: - if all(predicate in preds for preds in arg_predicates): - output_predicates.add(predicate) - if expr.op == pydop.DIV: - if ( - LogicalPredicate.NOT_NULL in arg_predicates[0] - and LogicalPredicate.NOT_NULL in arg_predicates[1] - and LogicalPredicate.POSITIVE in arg_predicates[1] - ): - output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.not_null = True + case pydop.DEFAULT_TO: + # DEFAULT_TO(None, x) -> x if ( isinstance(expr.inputs[0], LiteralExpression) and expr.inputs[0].value is None ): - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - if LogicalPredicate.NOT_NULL in arg_predicates[0]: + if len(expr.inputs) == 2: + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] + ) + output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + + # DEFAULT_TO(x, y) -> x if x is non-null. + elif arg_predicates[0].not_null: output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] + + # Otherwise, it is non-null if any of the arguments are non-null, + # and gains any predicates that all the arguments have in common. else: - if any(LogicalPredicate.NOT_NULL in preds for preds in arg_predicates): - output_predicates.add(LogicalPredicate.NOT_NULL) - for pred in arg_predicates[0]: - if all(pred in preds for preds in arg_predicates): - output_predicates.add(pred) + if union_set.not_null: + output_predicates.not_null = True + output_predicates |= intersect_set + + # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # know it is always non-negative. case pydop.ABS: - if ( - LogicalPredicate.POSITIVE in arg_predicates[0] - or LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] - ): + if arg_predicates[0].not_negative or arg_predicates[0].positive: output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] else: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # LENGTH(x) can be constant folded if x is a string literal. Otherwise, + # we know it is non-negative. case pydop.LENGTH: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -184,8 +292,12 @@ def simplify_function_call( str_len: int = len(expr.inputs[0].value) output_expr = LiteralExpression(str_len, expr.data_type) if str_len > 0: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive = True + output_predicates.not_negative = True + + # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant + # folded if the inputs are string literals. The boolean-returning + # operators are always non-negative. case pydop.LOWER: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -211,7 +323,10 @@ def simplify_function_call( expr.inputs[0].value.startswith(expr.inputs[1].value), expr.data_type, ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= expr.inputs[0].value.startswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True case pydop.ENDSWITH: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -222,7 +337,10 @@ def simplify_function_call( output_expr = LiteralExpression( expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= expr.inputs[0].value.endswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True case pydop.CONTAINS: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -233,7 +351,13 @@ def simplify_function_call( output_expr = LiteralExpression( expr.inputs[1].value in expr.inputs[0].value, expr.data_type ) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive |= ( + expr.inputs[1].value in expr.inputs[0].value + ) + output_predicates.not_negative = True + + # SQRT(x) can be constant folded if x is a literal and non-negative. + # Otherwise, it is non-negative, and positive if x is positive. case pydop.SQRT: if ( isinstance(expr.inputs[0], LiteralExpression) @@ -242,9 +366,10 @@ def simplify_function_call( ): sqrt_value: float = expr.inputs[0].value ** 0.5 output_expr = LiteralExpression(sqrt_value, expr.data_type) - if sqrt_value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if arg_predicates[0].positive: + output_predicates.positive = True + output_predicates.not_negative = True + case pydop.MONOTONIC: v0: int | float | None = None v1: int | float | None = None @@ -262,11 +387,17 @@ def simplify_function_call( expr.inputs[2].value, (int, float) ): v2 = expr.inputs[2].value + + # MONOTONIC(x, y, z), where x/y/z are all literals + # -> True if x <= y <= z, False otherwise if v0 is not None and v1 is not None and v2 is not None: monotonic_result = (v0 <= v1) and (v1 <= v2) output_expr = LiteralExpression(monotonic_result, expr.data_type) if monotonic_result: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.positive = True + + # MONOTONIC(x, y, z), where x/y are literals + # -> if x <= y, then y <= z, otherwise False elif v0 is not None and v1 is not None: if v0 <= v1: output_expr = CallExpression( @@ -274,6 +405,9 @@ def simplify_function_call( ) else: output_expr = LiteralExpression(False, expr.data_type) + + # MONOTONIC(x, y, z), where y/z are literals + # -> if y <= z, then x <= y, otherwise False elif v1 is not None and v2 is not None: if v1 <= v2: output_expr = CallExpression( @@ -281,9 +415,14 @@ def simplify_function_call( ) else: output_expr = LiteralExpression(False, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # XOR and LIKE are always non-negative case pydop.BXR | pydop.LIKE: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # X & Y is False if any of the arguments are False-y literals, and True + # if all of the arguments are Truth-y literals. case pydop.BAN: if any( isinstance(arg, LiteralExpression) and arg.value in [0, False, None] @@ -295,7 +434,10 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # X | Y is True if any of the arguments are Truth-y literals, and False + # if all of the arguments are False-y literals. case pydop.BOR: if any( isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] @@ -307,25 +449,42 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(False, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: match (expr.inputs[0], expr.op, expr.inputs[1]): - case (_, pydop.GRT, LiteralExpression()) if ( - expr.inputs[1].value == 0 - and LogicalPredicate.POSITIVE in arg_predicates[0] + # x > y is True if x is positive and y is a literal that is + # zero or negative. The same goes for x >= y. + case (_, pydop.GRT, LiteralExpression()) | ( + _, + pydop.GEQ, + LiteralExpression(), + ) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].positive ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # x >= y is True if x is non-negative and y is a literal that is + # zero or negative. case (_, pydop.GEQ, LiteralExpression()) if ( - expr.inputs[1].value == 0 - and LogicalPredicate.NOT_NEGATIVE in arg_predicates[0] + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].not_negative ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # The rest of the case of x CMP y can be constant folded if both + # x and y are literals. case (LiteralExpression(), _, LiteralExpression()): match ( expr.inputs[0].value, @@ -357,88 +516,115 @@ def simplify_function_call( case _: pass - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True + + # PRESENT(x) is True if x is non-null. case pydop.PRESENT: - if LogicalPredicate.NOT_NULL in arg_predicates[0]: + if arg_predicates[0].not_null: output_expr = LiteralExpression(True, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # ABSENT(x) is True if x is null. case pydop.ABSENT: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # IFF(True, y, z) -> y (same if the first argument is guaranteed to be + # positive & non-null). + # IFF(False, y, z) -> z + # Otherwise, it inherits the intersection of the predicates of y and z. case pydop.IFF: if isinstance(expr.inputs[0], LiteralExpression): if bool(expr.inputs[0].value): output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] + output_predicates |= arg_predicates[1] else: output_expr = expr.inputs[2] - output_predicates = arg_predicates[2] - elif ( - LogicalPredicate.POSITIVE in arg_predicates[0] - and LogicalPredicate.NOT_NULL in arg_predicates[0] - ): + output_predicates |= arg_predicates[2] + elif arg_predicates[0].not_null and arg_predicates[0].positive: output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] + output_predicates |= arg_predicates[1] else: - output_predicates = arg_predicates[1] & arg_predicates[2] + output_predicates |= arg_predicates[1] & arg_predicates[2] + + # KEEP_IF(x, True) -> x + # KEEP_IF(x, False) -> None case pydop.KEEP_IF: if isinstance(expr.inputs[1], LiteralExpression): if bool(expr.inputs[1].value): output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] + output_predicates |= arg_predicates[0] else: output_expr = LiteralExpression(None, expr.data_type) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - elif ( - LogicalPredicate.POSITIVE in arg_predicates[1] - and LogicalPredicate.NOT_NULL in arg_predicates[1] - ): + output_predicates.not_negative = True + elif arg_predicates[1].not_null and arg_predicates[1].positive: output_expr = expr.inputs[0] output_predicates = arg_predicates[0] - elif LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + else: + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) return output_expr, output_predicates def simplify_window_call( expr: WindowCallExpression, - arg_predicates: list[set[LogicalPredicate]], -) -> tuple[RelationalExpression, set[LogicalPredicate]]: + arg_predicates: list[PredicateSet], +) -> tuple[RelationalExpression, PredicateSet]: """ TODO """ - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() no_frame: bool = not ( expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs ) match expr.op: + # RANKING & PERCENTILE are always non-null, non-negative, and positive. case pydop.RANKING | pydop.PERCENTILE: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # RELSUM and RELAVG retain the properties of their argument, but become + # nullable if there is a frame. case pydop.RELSUM | pydop.RELAVG: - if LogicalPredicate.NOT_NULL in arg_predicates[0] and no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - if LogicalPredicate.NOT_NEGATIVE in arg_predicates[0]: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) - if LogicalPredicate.POSITIVE in arg_predicates[0] and no_frame: - output_predicates.add(LogicalPredicate.POSITIVE) + if arg_predicates[0].not_null and no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_negative: + output_predicates.not_negative = True + if arg_predicates[0].positive: + output_predicates.positive = True + + # RELSIZE is always non-negative, but is only non-null & positive if + # there is no frame. case pydop.RELSIZE: if no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_null = True + output_predicates.positive = True + output_predicates.not_negative = True + + # RELCOUNT is always non-negative, but it is only non-null if there is + # no frame, and positive if there is no frame and the first argument + # is non-null. case pydop.RELCOUNT: if no_frame: - output_predicates.add(LogicalPredicate.NOT_NULL) - if LogicalPredicate.NOT_NULL in arg_predicates[0]: - output_predicates.add(LogicalPredicate.POSITIVE) - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_null = True + if arg_predicates[0].not_null: + output_predicates.positive = True + output_predicates.not_negative = True return expr, output_predicates -def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: +def infer_literal_predicates(expr: LiteralExpression) -> PredicateSet: """ Infers logical predicates from a literal expression. @@ -448,22 +634,22 @@ def infer_literal_predicates(expr: LiteralExpression) -> set[LogicalPredicate]: Returns: A set of logical predicates inferred from the literal. """ - output_predicates: set[LogicalPredicate] = set() + output_predicates: PredicateSet = PredicateSet() if expr.value is not None: - output_predicates.add(LogicalPredicate.NOT_NULL) + output_predicates.not_null = True if isinstance(expr.value, (int, float)): if expr.value >= 0: - output_predicates.add(LogicalPredicate.NOT_NEGATIVE) + output_predicates.not_negative = True if expr.value > 0: - output_predicates.add(LogicalPredicate.POSITIVE) + output_predicates.positive = True return output_predicates def run_simplification( expr: RelationalExpression, - input_predicates: dict[RelationalExpression, set[LogicalPredicate]], + input_predicates: dict[RelationalExpression, PredicateSet], no_group_aggregate: bool, -) -> tuple[RelationalExpression, set[LogicalPredicate]]: +) -> tuple[RelationalExpression, PredicateSet]: """ Runs the simplification on a single expression, applying any predicates inferred from the input nodes to aid the process and inferring any new @@ -484,15 +670,15 @@ def run_simplification( new_args: list[RelationalExpression] new_partitions: list[RelationalExpression] new_orders: list[ExpressionSortInfo] - arg_predicates: list[set[LogicalPredicate]] - output_predicates: set[LogicalPredicate] = set() + arg_predicates: list[PredicateSet] + output_predicates: PredicateSet = PredicateSet() requires_rewrite: bool = False if isinstance(expr, LiteralExpression): output_predicates = infer_literal_predicates(expr) if isinstance(expr, ColumnReference): - output_predicates = input_predicates.get(expr, set()) + output_predicates = input_predicates.get(expr, PredicateSet()) if isinstance(expr, CallExpression): new_args = [] @@ -552,7 +738,7 @@ def run_simplification( def simplify_expressions( node: RelationalNode, -) -> dict[RelationalExpression, set[LogicalPredicate]]: +) -> dict[RelationalExpression, PredicateSet]: """ The main recursive procedure done to perform expression simplification on a relational node and its descendants. The transformation is done in-place @@ -564,7 +750,7 @@ def simplify_expressions( The predicates inferred from the output columns of the node. """ # Recursively invoke the procedure on all inputs to the node. - input_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + input_predicates: dict[RelationalExpression, PredicateSet] = {} for idx, input_node in enumerate(node.inputs): input_alias: str | None = node.default_input_aliases[idx] predicates = simplify_expressions(input_node) @@ -573,7 +759,7 @@ def simplify_expressions( # Transform the expressions of the current node in-place. ref_expr: RelationalExpression - output_predicates: dict[RelationalExpression, set[LogicalPredicate]] = {} + output_predicates: dict[RelationalExpression, PredicateSet] = {} match node: case ( Project() @@ -612,8 +798,7 @@ def simplify_expressions( isinstance(expr, ColumnReference) and expr.input_name != node.default_input_aliases[0] ): - preds.discard(LogicalPredicate.NOT_NULL) - preds.discard(LogicalPredicate.POSITIVE) + preds.not_null = False case Aggregate(): for name, expr in node.keys.items(): ref_expr = ColumnReference(name, expr.data_type) diff --git a/tests/test_plan_refsols/simplification_1.txt b/tests/test_plan_refsols/simplification_1.txt index 9a342e6c4..6089ea8c9 100644 --- a/tests/test_plan_refsols/simplification_1.txt +++ b/tests/test_plan_refsols/simplification_1.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', DEFAULT_TO(avg_expr_4, 0:numeric)), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) +ROOT(columns=[('s00', 13:numeric), ('s01', 0:numeric), ('s02', n_rows), ('s03', n_rows + 5:numeric), ('s04', n_rows * 2:numeric), ('s05', n_rows / 8.0:numeric), ('s06', 10:numeric), ('s07', n_rows), ('s08', ABS(n_rows - 25:numeric)), ('s09', n_rows + 1:numeric), ('s10', n_rows - 3:numeric), ('s11', n_rows * -1:numeric), ('s12', n_rows / 2.5:numeric), ('s13', n_rows > 10:numeric), ('s14', n_rows >= 10:numeric), ('s15', n_rows == 20:numeric), ('s16', n_rows != 25:numeric), ('s17', n_rows < 25:numeric), ('s18', n_rows <= 25:numeric), ('s19', n_rows), ('s20', avg_expr_4), ('s21', True:bool), ('s22', True:bool), ('s23', True:bool)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_expr_4': AVG(DEFAULT_TO(LENGTH(sbCustName), 0:numeric)), 'n_rows': COUNT()}) SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_sql_refsols/simplification_1_ansi.sql b/tests/test_sql_refsols/simplification_1_ansi.sql index a07916fd6..9e7a4ecbe 100644 --- a/tests/test_sql_refsols/simplification_1_ansi.sql +++ b/tests/test_sql_refsols/simplification_1_ansi.sql @@ -19,7 +19,7 @@ SELECT COUNT(*) < 25 AS s17, COUNT(*) <= 25 AS s18, COUNT(*) AS s19, - COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, TRUE AS s21, TRUE AS s22, TRUE AS s23 diff --git a/tests/test_sql_refsols/simplification_1_sqlite.sql b/tests/test_sql_refsols/simplification_1_sqlite.sql index 39f87c5f9..77364717e 100644 --- a/tests/test_sql_refsols/simplification_1_sqlite.sql +++ b/tests/test_sql_refsols/simplification_1_sqlite.sql @@ -19,7 +19,7 @@ SELECT COUNT(*) < 25 AS s17, COUNT(*) <= 25 AS s18, COUNT(*) AS s19, - COALESCE(AVG(COALESCE(LENGTH(sbcustname), 0)), 0) AS s20, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, TRUE AS s21, TRUE AS s22, TRUE AS s23 From e6c9fbe8c79662c11e4739d5920f06c79adb5ad5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:41:07 -0400 Subject: [PATCH 66/97] Refactoring to use shuttles & visitors for simplification --- .../conversion/relational_simplification.py | 1292 +++++++++-------- pydough/relational/__init__.py | 2 + .../relational_expressions/__init__.py | 2 + .../relational_nodes/relational_visitor.py | 3 +- 4 files changed, 692 insertions(+), 607 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 341ff9a0d..dc1f9e1b1 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -12,8 +12,8 @@ Aggregate, CallExpression, ColumnReference, + CorrelatedReference, EmptySingleton, - ExpressionSortInfo, Filter, Join, JoinType, @@ -21,8 +21,10 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionShuttle, RelationalNode, RelationalRoot, + RelationalVisitor, Scan, WindowCallExpression, ) @@ -157,666 +159,744 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": } -def simplify_function_call( - expr: CallExpression, - arg_predicates: list[PredicateSet], - no_group_aggregate: bool, -) -> tuple[RelationalExpression, PredicateSet]: +class SimplificationShuttle(RelationalExpressionShuttle): """ TODO """ - output_expr: RelationalExpression = expr - output_predicates: PredicateSet = PredicateSet() - union_set: PredicateSet = PredicateSet.union(arg_predicates) - intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - - # If the call has null propagating rules, all of hte arguments are non-null, - # the output is guaranteed to be non-null. - if expr.op in NULL_PROPAGATING_OPS: - if intersect_set.not_null: - output_predicates.not_null = True - match expr.op: - case pydop.COUNT | pydop.NDISTINCT: - # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null - # and non-negative. - output_predicates.not_null = True - output_predicates.not_negative = True + def __init__(self): + self.stack: list[PredicateSet] = [] + self._input_predicates: dict[RelationalExpression, PredicateSet] = {} + self._no_group_aggregate: bool = False - # The output if COUNT(*) is positive if unless doing a no-groupby - # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null - # column. - if not no_group_aggregate: - if len(expr.inputs) == 0 or arg_predicates[0].not_null: - output_predicates.positive = True + @property + def input_predicates(self) -> dict[RelationalExpression, PredicateSet]: + """ + Returns the input predicates that were passed to the shuttle. + """ + return self._input_predicates - # COUNT(x) where x is non-null can be rewritten as COUNT(*), which - # has the same positive rule as before. - elif ( - expr.op == pydop.COUNT - and len(expr.inputs) == 1 - and arg_predicates[0].not_null - ): - if not no_group_aggregate: - output_predicates.positive = True - output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - - # All of these operators are non-null aor non-negative if their first - # argument is. - case ( - pydop.SUM - | pydop.AVG - | pydop.MIN - | pydop.MAX - | pydop.ANYTHING - | pydop.MEDIAN - | pydop.QUANTILE - ): - output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True - ) - - # The result of addition is non-negative or positive if all the - # operands are. It is also positive if all the operands are non-negative - # and at least one of them is positive. - case pydop.ADD: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - if intersect_set.not_negative and union_set.positive: - output_predicates.positive = True - - # The result of multiplication is non-negative or positive if all the - # operands are. - case pydop.MUL: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - - # The result of division is non-negative or positive if all the - # operands are, and is also non-null if both operands are non-null and - # the second operand is positive. - case pydop.DIV: - output_predicates |= intersect_set & PredicateSet( - not_negative=True, positive=True - ) - if ( - arg_predicates[0].not_null - and arg_predicates[1].not_null - and arg_predicates[1].positive - ): - output_predicates.not_null = True + @input_predicates.setter + def input_predicates(self, value: dict[RelationalExpression, PredicateSet]) -> None: + """ + Sets the input predicates for the shuttle. + """ + self._input_predicates = value - case pydop.DEFAULT_TO: - # DEFAULT_TO(None, x) -> x - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - if len(expr.inputs) == 2: - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - else: - output_expr = CallExpression( - pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] - ) - output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + @property + def no_group_aggregate(self) -> bool: + """ + Returns whether the shuttle currently a handling no-group-aggregate. + """ + return self._no_group_aggregate - # DEFAULT_TO(x, y) -> x if x is non-null. - elif arg_predicates[0].not_null: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] + @no_group_aggregate.setter + def no_group_aggregate(self, value: bool) -> None: + """ + Sets whether the shuttle is handling a no-group-aggregate. + """ + self._no_group_aggregate = value - # Otherwise, it is non-null if any of the arguments are non-null, - # and gains any predicates that all the arguments have in common. - else: - if union_set.not_null: - output_predicates.not_null = True - output_predicates |= intersect_set - - # ABS(x) -> x if x is positive or non-negative. At hte very least, we - # know it is always non-negative. - case pydop.ABS: - if arg_predicates[0].not_negative or arg_predicates[0].positive: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] - else: + def reset(self) -> None: + self.stack = [] + + def visit_literal_expression( + self, literal_expression: LiteralExpression + ) -> RelationalExpression: + output_predicates: PredicateSet = PredicateSet() + if literal_expression.value is not None: + output_predicates.not_null = True + if isinstance(literal_expression.value, (int, float)): + if literal_expression.value >= 0: + output_predicates.not_negative = True + if literal_expression.value > 0: + output_predicates.positive = True + self.stack.append(output_predicates) + return literal_expression + + def visit_column_reference( + self, column_reference: ColumnReference + ) -> RelationalExpression: + self.stack.append(self.input_predicates.get(column_reference, PredicateSet())) + return column_reference + + def visit_correlated_reference( + self, correlated_reference: CorrelatedReference + ) -> RelationalExpression: + self.stack.append(PredicateSet()) + return correlated_reference + + def visit_call_expression( + self, call_expression: CallExpression + ) -> RelationalExpression: + new_call = super().visit_call_expression(call_expression) + assert isinstance(new_call, CallExpression) + arg_predicates: list[PredicateSet] = [ + self.stack.pop() for _ in range(len(new_call.inputs)) + ] + arg_predicates.reverse() + return self.simplify_function_call( + new_call, arg_predicates, self.no_group_aggregate + ) + + def visit_window_expression( + self, window_expression: WindowCallExpression + ) -> RelationalExpression: + new_window = super().visit_window_expression(window_expression) + assert isinstance(new_window, WindowCallExpression) + for _ in range(len(new_window.order_inputs)): + self.stack.pop() + for _ in range(len(new_window.partition_inputs)): + self.stack.pop() + arg_predicates: list[PredicateSet] = [ + self.stack.pop() for _ in range(len(new_window.inputs)) + ] + arg_predicates.reverse() + return self.simplify_window_call(new_window, arg_predicates) + + def simplify_function_call( + self, + expr: CallExpression, + arg_predicates: list[PredicateSet], + no_group_aggregate: bool, + ) -> RelationalExpression: + """ + TODO + """ + output_expr: RelationalExpression = expr + output_predicates: PredicateSet = PredicateSet() + union_set: PredicateSet = PredicateSet.union(arg_predicates) + intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) + + # If the call has null propagating rules, all of hte arguments are non-null, + # the output is guaranteed to be non-null. + if expr.op in NULL_PROPAGATING_OPS: + if intersect_set.not_null: + output_predicates.not_null = True + + match expr.op: + case pydop.COUNT | pydop.NDISTINCT: + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null + # and non-negative. + output_predicates.not_null = True output_predicates.not_negative = True - # LENGTH(x) can be constant folded if x is a string literal. Otherwise, - # we know it is non-negative. - case pydop.LENGTH: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - str_len: int = len(expr.inputs[0].value) - output_expr = LiteralExpression(str_len, expr.data_type) - if str_len > 0: - output_predicates.positive = True - output_predicates.not_negative = True - - # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant - # folded if the inputs are string literals. The boolean-returning - # operators are always non-negative. - case pydop.LOWER: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - output_expr = LiteralExpression( - expr.inputs[0].value.lower(), expr.data_type - ) - case pydop.UPPER: - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, str - ): - output_expr = LiteralExpression( - expr.inputs[0].value.upper(), expr.data_type - ) - case pydop.STARTSWITH: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) - ): - output_expr = LiteralExpression( - expr.inputs[0].value.startswith(expr.inputs[1].value), - expr.data_type, - ) - output_predicates.positive |= expr.inputs[0].value.startswith( - expr.inputs[1].value - ) - output_predicates.not_negative = True - case pydop.ENDSWITH: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) + # The output if COUNT(*) is positive if unless doing a no-groupby + # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null + # column. + if not no_group_aggregate: + if len(expr.inputs) == 0 or arg_predicates[0].not_null: + output_predicates.positive = True + + # COUNT(x) where x is non-null can be rewritten as COUNT(*), which + # has the same positive rule as before. + elif ( + expr.op == pydop.COUNT + and len(expr.inputs) == 1 + and arg_predicates[0].not_null + ): + if not no_group_aggregate: + output_predicates.positive = True + output_expr = CallExpression(pydop.COUNT, expr.data_type, []) + + # All of these operators are non-null aor non-negative if their first + # argument is. + case ( + pydop.SUM + | pydop.AVG + | pydop.MIN + | pydop.MAX + | pydop.ANYTHING + | pydop.MEDIAN + | pydop.QUANTILE ): - output_expr = LiteralExpression( - expr.inputs[0].value.endswith(expr.inputs[1].value), expr.data_type + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True ) - output_predicates.positive |= expr.inputs[0].value.endswith( - expr.inputs[1].value + + # The result of addition is non-negative or positive if all the + # operands are. It is also positive if all the operands are non-negative + # and at least one of them is positive. + case pydop.ADD: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.not_negative = True - case pydop.CONTAINS: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, str) - and isinstance(expr.inputs[1], LiteralExpression) - and isinstance(expr.inputs[1].value, str) - ): - output_expr = LiteralExpression( - expr.inputs[1].value in expr.inputs[0].value, expr.data_type + if intersect_set.not_negative and union_set.positive: + output_predicates.positive = True + + # The result of multiplication is non-negative or positive if all the + # operands are. + case pydop.MUL: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.positive |= ( - expr.inputs[1].value in expr.inputs[0].value + + # The result of division is non-negative or positive if all the + # operands are, and is also non-null if both operands are non-null and + # the second operand is positive. + case pydop.DIV: + output_predicates |= intersect_set & PredicateSet( + not_negative=True, positive=True ) - output_predicates.not_negative = True - - # SQRT(x) can be constant folded if x is a literal and non-negative. - # Otherwise, it is non-negative, and positive if x is positive. - case pydop.SQRT: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and isinstance(expr.inputs[0].value, (int, float)) - and expr.inputs[0].value >= 0 - ): - sqrt_value: float = expr.inputs[0].value ** 0.5 - output_expr = LiteralExpression(sqrt_value, expr.data_type) - if arg_predicates[0].positive: - output_predicates.positive = True - output_predicates.not_negative = True - - case pydop.MONOTONIC: - v0: int | float | None = None - v1: int | float | None = None - v2: int | float | None = None - monotonic_result: bool - if isinstance(expr.inputs[0], LiteralExpression) and isinstance( - expr.inputs[0].value, (int, float) - ): - v0 = expr.inputs[0].value - if isinstance(expr.inputs[1], LiteralExpression) and isinstance( - expr.inputs[1].value, (int, float) - ): - v1 = expr.inputs[1].value - if isinstance(expr.inputs[2], LiteralExpression) and isinstance( - expr.inputs[2].value, (int, float) - ): - v2 = expr.inputs[2].value - - # MONOTONIC(x, y, z), where x/y/z are all literals - # -> True if x <= y <= z, False otherwise - if v0 is not None and v1 is not None and v2 is not None: - monotonic_result = (v0 <= v1) and (v1 <= v2) - output_expr = LiteralExpression(monotonic_result, expr.data_type) - if monotonic_result: - output_predicates.positive = True + if ( + arg_predicates[0].not_null + and arg_predicates[1].not_null + and arg_predicates[1].positive + ): + output_predicates.not_null = True - # MONOTONIC(x, y, z), where x/y are literals - # -> if x <= y, then y <= z, otherwise False - elif v0 is not None and v1 is not None: - if v0 <= v1: - output_expr = CallExpression( - pydop.LEQ, expr.data_type, expr.inputs[1:] - ) + case pydop.DEFAULT_TO: + # DEFAULT_TO(None, x) -> x + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + if len(expr.inputs) == 2: + output_expr = expr.inputs[1] + output_predicates = arg_predicates[1] + else: + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] + ) + output_predicates |= PredicateSet.intersect(arg_predicates[1:]) + + # DEFAULT_TO(x, y) -> x if x is non-null. + elif arg_predicates[0].not_null: + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + + # Otherwise, it is non-null if any of the arguments are non-null, + # and gains any predicates that all the arguments have in common. else: - output_expr = LiteralExpression(False, expr.data_type) + if union_set.not_null: + output_predicates.not_null = True + output_predicates |= intersect_set + + # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # know it is always non-negative. + case pydop.ABS: + if arg_predicates[0].not_negative or arg_predicates[0].positive: + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + else: + output_predicates.not_negative = True + + # LENGTH(x) can be constant folded if x is a string literal. Otherwise, + # we know it is non-negative. + case pydop.LENGTH: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + str_len: int = len(expr.inputs[0].value) + output_expr = LiteralExpression(str_len, expr.data_type) + if str_len > 0: + output_predicates.positive = True + output_predicates.not_negative = True - # MONOTONIC(x, y, z), where y/z are literals - # -> if y <= z, then x <= y, otherwise False - elif v1 is not None and v2 is not None: - if v1 <= v2: - output_expr = CallExpression( - pydop.LEQ, expr.data_type, expr.inputs[:2] + # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant + # folded if the inputs are string literals. The boolean-returning + # operators are always non-negative. + case pydop.LOWER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.lower(), expr.data_type ) - else: + case pydop.UPPER: + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, str + ): + output_expr = LiteralExpression( + expr.inputs[0].value.upper(), expr.data_type + ) + case pydop.STARTSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.startswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.positive |= expr.inputs[0].value.startswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True + case pydop.ENDSWITH: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[0].value.endswith(expr.inputs[1].value), + expr.data_type, + ) + output_predicates.positive |= expr.inputs[0].value.endswith( + expr.inputs[1].value + ) + output_predicates.not_negative = True + case pydop.CONTAINS: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, str) + and isinstance(expr.inputs[1], LiteralExpression) + and isinstance(expr.inputs[1].value, str) + ): + output_expr = LiteralExpression( + expr.inputs[1].value in expr.inputs[0].value, expr.data_type + ) + output_predicates.positive |= ( + expr.inputs[1].value in expr.inputs[0].value + ) + output_predicates.not_negative = True + + # SQRT(x) can be constant folded if x is a literal and non-negative. + # Otherwise, it is non-negative, and positive if x is positive. + case pydop.SQRT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and isinstance(expr.inputs[0].value, (int, float)) + and expr.inputs[0].value >= 0 + ): + sqrt_value: float = expr.inputs[0].value ** 0.5 + output_expr = LiteralExpression(sqrt_value, expr.data_type) + if arg_predicates[0].positive: + output_predicates.positive = True + output_predicates.not_negative = True + + case pydop.MONOTONIC: + v0: int | float | None = None + v1: int | float | None = None + v2: int | float | None = None + monotonic_result: bool + if isinstance(expr.inputs[0], LiteralExpression) and isinstance( + expr.inputs[0].value, (int, float) + ): + v0 = expr.inputs[0].value + if isinstance(expr.inputs[1], LiteralExpression) and isinstance( + expr.inputs[1].value, (int, float) + ): + v1 = expr.inputs[1].value + if isinstance(expr.inputs[2], LiteralExpression) and isinstance( + expr.inputs[2].value, (int, float) + ): + v2 = expr.inputs[2].value + + # MONOTONIC(x, y, z), where x/y/z are all literals + # -> True if x <= y <= z, False otherwise + if v0 is not None and v1 is not None and v2 is not None: + monotonic_result = (v0 <= v1) and (v1 <= v2) + output_expr = LiteralExpression(monotonic_result, expr.data_type) + if monotonic_result: + output_predicates.positive = True + + # MONOTONIC(x, y, z), where x/y are literals + # -> if x <= y, then y <= z, otherwise False + elif v0 is not None and v1 is not None: + if v0 <= v1: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[1:] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + + # MONOTONIC(x, y, z), where y/z are literals + # -> if y <= z, then x <= y, otherwise False + elif v1 is not None and v2 is not None: + if v1 <= v2: + output_expr = CallExpression( + pydop.LEQ, expr.data_type, expr.inputs[:2] + ) + else: + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.not_negative = True + + # XOR and LIKE are always non-negative + case pydop.BXR | pydop.LIKE: + output_predicates.not_negative = True + + # X & Y is False if any of the arguments are False-y literals, and True + # if all of the arguments are Truth-y literals. + case pydop.BAN: + if any( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): output_expr = LiteralExpression(False, expr.data_type) - output_predicates.not_negative = True - - # XOR and LIKE are always non-negative - case pydop.BXR | pydop.LIKE: - output_predicates.not_negative = True - - # X & Y is False if any of the arguments are False-y literals, and True - # if all of the arguments are Truth-y literals. - case pydop.BAN: - if any( - isinstance(arg, LiteralExpression) and arg.value in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(False, expr.data_type) - if all( - isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.not_negative = True - - # X | Y is True if any of the arguments are Truth-y literals, and False - # if all of the arguments are False-y literals. - case pydop.BOR: - if any( - isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(True, expr.data_type) - if all( - isinstance(arg, LiteralExpression) and arg.value in [0, False, None] - for arg in expr.inputs - ): - output_expr = LiteralExpression(False, expr.data_type) - output_predicates.not_negative = True - - case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: - match (expr.inputs[0], expr.op, expr.inputs[1]): - # x > y is True if x is positive and y is a literal that is - # zero or negative. The same goes for x >= y. - case (_, pydop.GRT, LiteralExpression()) | ( - _, - pydop.GEQ, - LiteralExpression(), - ) if ( - isinstance(expr.inputs[1].value, (int, float, bool)) - and expr.inputs[1].value <= 0 - and arg_predicates[0].not_null - and arg_predicates[0].positive + if all( + isinstance(arg, LiteralExpression) + and arg.value not in [0, False, None] + for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) + output_predicates.not_negative = True - # x >= y is True if x is non-negative and y is a literal that is - # zero or negative. - case (_, pydop.GEQ, LiteralExpression()) if ( - isinstance(expr.inputs[1].value, (int, float, bool)) - and expr.inputs[1].value <= 0 - and arg_predicates[0].not_null - and arg_predicates[0].not_negative + # X | Y is True if any of the arguments are Truth-y literals, and False + # if all of the arguments are False-y literals. + case pydop.BOR: + if any( + isinstance(arg, LiteralExpression) + and arg.value not in [0, False, None] + for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) + if all( + isinstance(arg, LiteralExpression) and arg.value in [0, False, None] + for arg in expr.inputs + ): + output_expr = LiteralExpression(False, expr.data_type) + output_predicates.not_negative = True - # The rest of the case of x CMP y can be constant folded if both - # x and y are literals. - case (LiteralExpression(), _, LiteralExpression()): - match ( - expr.inputs[0].value, - expr.inputs[1].value, - expr.op, + # NOT(x) is True if x is a False-y literal, and False if x is a + # Truth-y literal. + case pydop.NOT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is not None + ): + output_expr = LiteralExpression( + not bool(expr.inputs[0].value), expr.data_type + ) + output_predicates.positive = not bool(expr.inputs[0].value) + output_predicates.not_negative = True + pass + + case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: + match (expr.inputs[0], expr.op, expr.inputs[1]): + # x > y is True if x is positive and y is a literal that is + # zero or negative. The same goes for x >= y. + case (_, pydop.GRT, LiteralExpression()) | ( + _, + pydop.GEQ, + LiteralExpression(), + ) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].positive ): - case (None, _, _) | (_, None, _): - output_expr = LiteralExpression(None, expr.data_type) - case (x, y, pydop.EQU): - output_expr = LiteralExpression(x == y, expr.data_type) - case (x, y, pydop.NEQ): - output_expr = LiteralExpression(x != y, expr.data_type) - case (x, y, pydop.LET) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore - case (x, y, pydop.LEQ) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore - case (x, y, pydop.GRT) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore - case (x, y, pydop.GEQ) if isinstance( - x, (int, float, str, bool) - ) and isinstance(y, (int, float, str, bool)): - output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore - - case _: - pass - - output_predicates.not_negative = True - - # PRESENT(x) is True if x is non-null. - case pydop.PRESENT: - if arg_predicates[0].not_null: - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.positive = True - output_predicates.not_null = True - output_predicates.not_negative = True + output_expr = LiteralExpression(True, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # x >= y is True if x is non-negative and y is a literal + # that is zero or negative. + case (_, pydop.GEQ, LiteralExpression()) if ( + isinstance(expr.inputs[1].value, (int, float, bool)) + and expr.inputs[1].value <= 0 + and arg_predicates[0].not_null + and arg_predicates[0].not_negative + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True + ) + + # The rest of the case of x CMP y can be constant folded if + # both x and y are literals. + case (LiteralExpression(), _, LiteralExpression()): + match ( + expr.inputs[0].value, + expr.inputs[1].value, + expr.op, + ): + case (None, _, _) | (_, None, _): + output_expr = LiteralExpression(None, expr.data_type) + case (x, y, pydop.EQU): + output_expr = LiteralExpression(x == y, expr.data_type) + case (x, y, pydop.NEQ): + output_expr = LiteralExpression(x != y, expr.data_type) + case (x, y, pydop.LET) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x < y, expr.data_type) # type: ignore + case (x, y, pydop.LEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x <= y, expr.data_type) # type: ignore + case (x, y, pydop.GRT) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x > y, expr.data_type) # type: ignore + case (x, y, pydop.GEQ) if isinstance( + x, (int, float, str, bool) + ) and isinstance(y, (int, float, str, bool)): + output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore + + case _: + pass - # ABSENT(x) is True if x is null. - case pydop.ABSENT: - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - output_expr = LiteralExpression(True, expr.data_type) - output_predicates.positive = True - output_predicates.not_null = True - output_predicates.not_negative = True - - # IFF(True, y, z) -> y (same if the first argument is guaranteed to be - # positive & non-null). - # IFF(False, y, z) -> z - # Otherwise, it inherits the intersection of the predicates of y and z. - case pydop.IFF: - if isinstance(expr.inputs[0], LiteralExpression): - if bool(expr.inputs[0].value): + output_predicates.not_negative = True + + # PRESENT(x) is True if x is non-null. + case pydop.PRESENT: + if arg_predicates[0].not_null: + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # ABSENT(x) is True if x is null. + case pydop.ABSENT: + if ( + isinstance(expr.inputs[0], LiteralExpression) + and expr.inputs[0].value is None + ): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + output_predicates.not_null = True + output_predicates.not_negative = True + + # IFF(True, y, z) -> y (same if the first argument is guaranteed to + # be positive & non-null). + # IFF(False, y, z) -> z + # Otherwise, uses the intersection of the predicates of y and z. + case pydop.IFF: + if isinstance(expr.inputs[0], LiteralExpression): + if bool(expr.inputs[0].value): + output_expr = expr.inputs[1] + output_predicates |= arg_predicates[1] + else: + output_expr = expr.inputs[2] + output_predicates |= arg_predicates[2] + elif arg_predicates[0].not_null and arg_predicates[0].positive: output_expr = expr.inputs[1] output_predicates |= arg_predicates[1] else: - output_expr = expr.inputs[2] - output_predicates |= arg_predicates[2] - elif arg_predicates[0].not_null and arg_predicates[0].positive: - output_expr = expr.inputs[1] - output_predicates |= arg_predicates[1] - else: - output_predicates |= arg_predicates[1] & arg_predicates[2] - - # KEEP_IF(x, True) -> x - # KEEP_IF(x, False) -> None - case pydop.KEEP_IF: - if isinstance(expr.inputs[1], LiteralExpression): - if bool(expr.inputs[1].value): + output_predicates |= arg_predicates[1] & arg_predicates[2] + + # KEEP_IF(x, True) -> x + # KEEP_IF(x, False) -> None + case pydop.KEEP_IF: + if isinstance(expr.inputs[1], LiteralExpression): + if bool(expr.inputs[1].value): + output_expr = expr.inputs[0] + output_predicates |= arg_predicates[0] + else: + output_expr = LiteralExpression(None, expr.data_type) + output_predicates.not_negative = True + elif arg_predicates[1].not_null and arg_predicates[1].positive: output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] + output_predicates = arg_predicates[0] else: - output_expr = LiteralExpression(None, expr.data_type) - output_predicates.not_negative = True - elif arg_predicates[1].not_null and arg_predicates[1].positive: - output_expr = expr.inputs[0] - output_predicates = arg_predicates[0] - else: - output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True + output_predicates |= arg_predicates[0] & PredicateSet( + not_null=True, not_negative=True + ) + + self.stack.append(output_predicates) + return output_expr + + def simplify_window_call( + self, + expr: WindowCallExpression, + arg_predicates: list[PredicateSet], + ) -> RelationalExpression: + """ + TODO + """ + output_predicates: PredicateSet = PredicateSet() + output_expr: RelationalExpression = expr + no_frame: bool = not ( + expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs + ) + match expr.op: + # RANKING & PERCENTILE are always non-null, non-negative, and + # positive. + case pydop.RANKING | pydop.PERCENTILE: + output_predicates |= PredicateSet( + not_null=True, not_negative=True, positive=True ) - return output_expr, output_predicates + # RELSUM and RELAVG retain the properties of their argument, but + # become nullable if there is a frame. + case pydop.RELSUM | pydop.RELAVG: + if arg_predicates[0].not_null and no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_negative: + output_predicates.not_negative = True + if arg_predicates[0].positive: + output_predicates.positive = True -def simplify_window_call( - expr: WindowCallExpression, - arg_predicates: list[PredicateSet], -) -> tuple[RelationalExpression, PredicateSet]: - """ - TODO - """ - output_predicates: PredicateSet = PredicateSet() - no_frame: bool = not ( - expr.kwargs.get("cumulative", False) or "frame" in expr.kwargs - ) - match expr.op: - # RANKING & PERCENTILE are always non-null, non-negative, and positive. - case pydop.RANKING | pydop.PERCENTILE: - output_predicates |= PredicateSet( - not_null=True, not_negative=True, positive=True - ) - - # RELSUM and RELAVG retain the properties of their argument, but become - # nullable if there is a frame. - case pydop.RELSUM | pydop.RELAVG: - if arg_predicates[0].not_null and no_frame: - output_predicates.not_null = True - if arg_predicates[0].not_negative: + # RELSIZE is always non-negative, but is only non-null & positive if + # there is no frame. + case pydop.RELSIZE: + if no_frame: + output_predicates.not_null = True + output_predicates.positive = True output_predicates.not_negative = True - if arg_predicates[0].positive: - output_predicates.positive = True - # RELSIZE is always non-negative, but is only non-null & positive if - # there is no frame. - case pydop.RELSIZE: - if no_frame: - output_predicates.not_null = True - output_predicates.positive = True - output_predicates.not_negative = True - - # RELCOUNT is always non-negative, but it is only non-null if there is - # no frame, and positive if there is no frame and the first argument - # is non-null. - case pydop.RELCOUNT: - if no_frame: - output_predicates.not_null = True - if arg_predicates[0].not_null: - output_predicates.positive = True - output_predicates.not_negative = True - return expr, output_predicates + # RELCOUNT is always non-negative, but it is only non-null if there + # is no frame, and positive if there is no frame and the first + # argument is non-null. + case pydop.RELCOUNT: + if no_frame: + output_predicates.not_null = True + if arg_predicates[0].not_null: + output_predicates.positive = True + output_predicates.not_negative = True + + self.stack.append(output_predicates) + return output_expr -def infer_literal_predicates(expr: LiteralExpression) -> PredicateSet: +class SimplificationVisitor(RelationalVisitor): + """ + TODO """ - Infers logical predicates from a literal expression. - Args: - `expr`: The literal expression to infer predicates from. + def __init__(self): + self.stack: list[dict[RelationalExpression, PredicateSet]] = [] + self.shuttle: SimplificationShuttle = SimplificationShuttle() - Returns: - A set of logical predicates inferred from the literal. - """ - output_predicates: PredicateSet = PredicateSet() - if expr.value is not None: - output_predicates.not_null = True - if isinstance(expr.value, (int, float)): - if expr.value >= 0: - output_predicates.not_negative = True - if expr.value > 0: - output_predicates.positive = True - return output_predicates + def reset(self): + self.stack.clear() + self.shuttle.reset() + def get_input_predicates( + self, node: RelationalNode + ) -> dict[RelationalExpression, PredicateSet]: + """ + TODO + """ + # Recursively invoke the procedure on all inputs to the node. + self.visit_inputs(node) + + # For each input, pop the predicates from the stack and add them + # to the input predicates dictionary, using the appropriate input alias. + input_predicates: dict[RelationalExpression, PredicateSet] = {} + for i in reversed(range(len(node.inputs))): + input_alias: str | None = node.default_input_aliases[i] + predicates: dict[RelationalExpression, PredicateSet] = self.stack.pop() + for expr, preds in predicates.items(): + input_predicates[add_input_name(expr, input_alias)] = preds + + return input_predicates + + def generic_visit( + self, node: RelationalNode + ) -> dict[RelationalExpression, PredicateSet]: + """ + TODO + """ + input_predicates: dict[RelationalExpression, PredicateSet] = ( + self.get_input_predicates(node) + ) + self.shuttle.input_predicates = input_predicates + self.shuttle.no_group_aggregate = not ( + isinstance(node, Aggregate) and not node.keys + ) + # Transform the expressions of the current node in-place. + ref_expr: RelationalExpression + output_predicates: dict[RelationalExpression, PredicateSet] = {} + for name, expr in node.columns.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.columns[name] = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + return output_predicates + + def visit_scan(self, node: Scan) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + self.stack.append(output_predicates) -def run_simplification( - expr: RelationalExpression, - input_predicates: dict[RelationalExpression, PredicateSet], - no_group_aggregate: bool, -) -> tuple[RelationalExpression, PredicateSet]: - """ - Runs the simplification on a single expression, applying any predicates - inferred from the input nodes to aid the process and inferring any new - predicates that apply to the resulting expression. + def visit_empty_singleton(self, node: EmptySingleton) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + self.stack.append(output_predicates) - Args: - `expr`: The expression to simplify. - `input_predicates`: A dictionary mapping input columns to the set of - predicates that are true for the column. - `no_group_aggregate`: A boolean indicating whether the expression is - part of an aggregate operation w/o keys, which affects how predicates - are inferred. - - Returns: - The simplified expression and a set of predicates that apply to the - resulting expression. - """ - new_args: list[RelationalExpression] - new_partitions: list[RelationalExpression] - new_orders: list[ExpressionSortInfo] - arg_predicates: list[PredicateSet] - output_predicates: PredicateSet = PredicateSet() - requires_rewrite: bool = False - - if isinstance(expr, LiteralExpression): - output_predicates = infer_literal_predicates(expr) - - if isinstance(expr, ColumnReference): - output_predicates = input_predicates.get(expr, PredicateSet()) - - if isinstance(expr, CallExpression): - new_args = [] - arg_predicates = [] - for arg in expr.inputs: - new_arg, new_preds = run_simplification( - arg, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_arg is not arg - new_args.append(new_arg) - arg_predicates.append(new_preds) - if requires_rewrite: - expr = CallExpression(expr.op, expr.data_type, new_args) - expr, output_predicates = simplify_function_call( - expr, arg_predicates, no_group_aggregate + def visit_project(self, node: Project) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) ) + self.stack.append(output_predicates) - if isinstance(expr, WindowCallExpression): - new_args = [] - new_partitions = [] - new_orders = [] - arg_predicates = [] - for arg in expr.inputs: - new_arg, new_preds = run_simplification( - arg, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_arg is not arg - new_args.append(new_arg) - arg_predicates.append(new_preds) - for partition in expr.partition_inputs: - new_partition, _ = run_simplification( - partition, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_partition is not partition - new_partitions.append(new_partition) - for order in expr.order_inputs: - new_order, _ = run_simplification( - order.expr, input_predicates, no_group_aggregate - ) - requires_rewrite |= new_order is not order.expr - new_orders.append( - ExpressionSortInfo(new_order, order.ascending, order.nulls_first) - ) - if requires_rewrite: - expr = WindowCallExpression( - expr.op, - expr.data_type, - new_args, - new_partitions, - new_orders, - expr.kwargs, - ) - expr, output_predicates = simplify_window_call(expr, arg_predicates) - - return expr, output_predicates + def visit_filter(self, node: Filter) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the filter condition in-place. + node._condition = node.condition.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_join(self, node: Join) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the join condition in-place. + node._condition = node.condition.accept_shuttle(self.shuttle) + # If the join is not an inner join, remove any not-null predicates + # from the RHS of the join. + if node.join_type != JoinType.INNER: + for expr, preds in output_predicates.items(): + if ( + isinstance(expr, ColumnReference) + and expr.input_name != node.default_input_aliases[0] + ): + preds.not_null = False + self.stack.append(output_predicates) + + def visit_limit(self, node: Limit) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + # Transform the order keys in-place. + for ordering_expr in node.orderings: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_root(self, node: RelationalRoot) -> None: + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) + ) + node._ordered_columns = [ + (name, node.columns[name]) for name, _ in node.ordered_columns + ] + # Transform the order keys in-place. + for ordering_expr in node.orderings: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.stack.append(output_predicates) + + def visit_aggregate(self, node: Aggregate) -> None: + input_predicates: dict[RelationalExpression, PredicateSet] = ( + self.get_input_predicates(node) + ) + output_predicates: dict[RelationalExpression, PredicateSet] = {} + # Transform the keys & aggregates separately + self.shuttle.input_predicates = input_predicates + self.shuttle.no_group_aggregate = False + for name, expr in node.keys.items(): + ref_expr = ColumnReference(name, expr.data_type) + node.keys[name] = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + node.columns[name] = node.keys[name] + self.shuttle.no_group_aggregate = not node.keys + for name, expr in node.aggregations.items(): + ref_expr = ColumnReference(name, expr.data_type) + new_agg = expr.accept_shuttle(self.shuttle) + output_predicates[ref_expr] = self.shuttle.stack.pop() + assert isinstance(new_agg, CallExpression) + node.aggregations[name] = new_agg + node.columns[name] = node.aggregations[name] + self.stack.append(output_predicates) def simplify_expressions( node: RelationalNode, -) -> dict[RelationalExpression, PredicateSet]: +) -> None: """ - The main recursive procedure done to perform expression simplification on - a relational node and its descendants. The transformation is done in-place + Transforms the current node and all of its descendants in-place to simplify + any relational expressions. Args: `node`: The relational node to perform simplification on. - - Returns: - The predicates inferred from the output columns of the node. """ - # Recursively invoke the procedure on all inputs to the node. - input_predicates: dict[RelationalExpression, PredicateSet] = {} - for idx, input_node in enumerate(node.inputs): - input_alias: str | None = node.default_input_aliases[idx] - predicates = simplify_expressions(input_node) - for expr, preds in predicates.items(): - input_predicates[add_input_name(expr, input_alias)] = preds - - # Transform the expressions of the current node in-place. - ref_expr: RelationalExpression - output_predicates: dict[RelationalExpression, PredicateSet] = {} - match node: - case ( - Project() - | Filter() - | Join() - | Limit() - | RelationalRoot() - | Scan() - | EmptySingleton() - ): - for name, expr in node.columns.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.columns[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates, False - ) - if isinstance(node, (Filter, Join)): - node._condition = run_simplification( - node.condition, input_predicates, False - )[0] - if isinstance(node, (RelationalRoot, Limit)): - node._orderings = [ - ExpressionSortInfo( - run_simplification(order_expr.expr, input_predicates, False)[0], - order_expr.ascending, - order_expr.nulls_first, - ) - for order_expr in node.orderings - ] - if isinstance(node, RelationalRoot): - node._ordered_columns = [ - (name, node.columns[name]) for name, _ in node.ordered_columns - ] - if isinstance(node, Join) and node.join_type != JoinType.INNER: - for expr, preds in output_predicates.items(): - if ( - isinstance(expr, ColumnReference) - and expr.input_name != node.default_input_aliases[0] - ): - preds.not_null = False - case Aggregate(): - for name, expr in node.keys.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.keys[name], output_predicates[ref_expr] = run_simplification( - expr, input_predicates, False - ) - node.columns[name] = node.keys[name] - for name, expr in node.aggregations.items(): - ref_expr = ColumnReference(name, expr.data_type) - new_agg, output_predicates[ref_expr] = run_simplification( - expr, input_predicates, len(node.keys) == 0 - ) - assert isinstance(new_agg, CallExpression) - node.aggregations[name] = new_agg - node.columns[name] = node.aggregations[name] - - # For all other nodes, do not perform any simplification. - case _: - pass - - return output_predicates + simplifier: SimplificationVisitor = SimplificationVisitor() + node.accept(simplifier) diff --git a/pydough/relational/__init__.py b/pydough/relational/__init__.py index ff2cfb653..5f0e11839 100644 --- a/pydough/relational/__init__.py +++ b/pydough/relational/__init__.py @@ -19,6 +19,7 @@ "Project", "RelationalExpression", "RelationalExpressionDispatcher", + "RelationalExpressionShuttle", "RelationalExpressionVisitor", "RelationalNode", "RelationalRoot", @@ -37,6 +38,7 @@ ExpressionSortInfo, LiteralExpression, RelationalExpression, + RelationalExpressionShuttle, RelationalExpressionVisitor, WindowCallExpression, ) diff --git a/pydough/relational/relational_expressions/__init__.py b/pydough/relational/relational_expressions/__init__.py index 3eb8fc33d..487a043c3 100644 --- a/pydough/relational/relational_expressions/__init__.py +++ b/pydough/relational/relational_expressions/__init__.py @@ -14,6 +14,7 @@ "ExpressionSortInfo", "LiteralExpression", "RelationalExpression", + "RelationalExpressionShuttle", "RelationalExpressionVisitor", "WindowCallExpression", ] @@ -27,5 +28,6 @@ from .correlated_reference_finder import CorrelatedReferenceFinder from .expression_sort_info import ExpressionSortInfo from .literal_expression import LiteralExpression +from .relational_expression_shuttle import RelationalExpressionShuttle from .relational_expression_visitor import RelationalExpressionVisitor from .window_call_expression import WindowCallExpression diff --git a/pydough/relational/relational_nodes/relational_visitor.py b/pydough/relational/relational_nodes/relational_visitor.py index 7f8ebe79d..2a138b719 100644 --- a/pydough/relational/relational_nodes/relational_visitor.py +++ b/pydough/relational/relational_nodes/relational_visitor.py @@ -8,6 +8,7 @@ from abc import ABC, abstractmethod +from .abstract_node import RelationalNode from .aggregate import Aggregate from .empty_singleton import EmptySingleton from .filter import Filter @@ -36,7 +37,7 @@ def reset(self) -> None: Clear any internal state to allow reusing this visitor. """ - def visit_inputs(self, node) -> None: + def visit_inputs(self, node: RelationalNode) -> None: """ Visit all inputs of the provided node. This is a helper method to avoid repeating the same code in each visit method. From 94375ce8836606f9e23aba9ae33bb8b9c8fd55b2 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:41:58 -0400 Subject: [PATCH 67/97] Fixing comments --- .../conversion/relational_simplification.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index dc1f9e1b1..72883b709 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -268,28 +268,28 @@ def simplify_function_call( union_set: PredicateSet = PredicateSet.union(arg_predicates) intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - # If the call has null propagating rules, all of hte arguments are non-null, - # the output is guaranteed to be non-null. + # If the call has null propagating rules, all of hte arguments are + # non-null, the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: if intersect_set.not_null: output_predicates.not_null = True match expr.op: case pydop.COUNT | pydop.NDISTINCT: - # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be non-null - # and non-negative. + # COUNT(n), COUNT(*), and NDISTINCT(n) are guaranteed to be + # non-null and non-negative. output_predicates.not_null = True output_predicates.not_negative = True - # The output if COUNT(*) is positive if unless doing a no-groupby - # aggregation. Same goes for calling COUNT or NDISTINCT ona non-null - # column. + # The output if COUNT(*) is positive if unless doing a + # no-groupby aggregation. Same goes for calling COUNT or + # NDISTINCT ona non-null column. if not no_group_aggregate: if len(expr.inputs) == 0 or arg_predicates[0].not_null: output_predicates.positive = True - # COUNT(x) where x is non-null can be rewritten as COUNT(*), which - # has the same positive rule as before. + # COUNT(x) where x is non-null can be rewritten as COUNT(*), + # which has the same positive rule as before. elif ( expr.op == pydop.COUNT and len(expr.inputs) == 1 @@ -299,8 +299,8 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null aor non-negative if their first - # argument is. + # All of these operators are non-null aor non-negative if their + # first argument is. case ( pydop.SUM | pydop.AVG @@ -315,8 +315,8 @@ def simplify_function_call( ) # The result of addition is non-negative or positive if all the - # operands are. It is also positive if all the operands are non-negative - # and at least one of them is positive. + # operands are. It is also positive if all the operands are + # non-negative and at least one of them is positive. case pydop.ADD: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True @@ -324,16 +324,16 @@ def simplify_function_call( if intersect_set.not_negative and union_set.positive: output_predicates.positive = True - # The result of multiplication is non-negative or positive if all the - # operands are. + # The result of multiplication is non-negative or positive if all + # the operands are. case pydop.MUL: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True ) # The result of division is non-negative or positive if all the - # operands are, and is also non-null if both operands are non-null and - # the second operand is positive. + # operands are, and is also non-null if both operands are non-null + # and the second operand is positive. case pydop.DIV: output_predicates |= intersect_set & PredicateSet( not_negative=True, positive=True From 4914784b7a7030f5ea69f4f9510eba938ef958ad Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 23 Jul 2025 15:44:35 -0400 Subject: [PATCH 68/97] [RUN CI] From f150cd587a35615fbb9c9cc2b37297e11e950f48 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:18:46 -0400 Subject: [PATCH 69/97] Adding docstrings --- .../conversion/relational_simplification.py | 121 +++++++++++++----- 1 file changed, 92 insertions(+), 29 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 72883b709..f098da6eb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -161,7 +161,24 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": class SimplificationShuttle(RelationalExpressionShuttle): """ - TODO + Shuttle implementation for simplifying relational expressions. Has three + sources of state used to determine how to simplify expressions: + + - `input_predicates`: A dictionary mapping column references to + the corresponding predicate sets for all of the columns that are used as + inputs to all of the expressions in the current relational node (e.g. from + the inputs to the node). This needs to be set before the shuttle is + used, and the default is an empty dictionary. + - `no_group_aggregate`: A boolean indicating whether the current + transformation is being done within the context of an aggregation without + grouping keys. This is important because some aggregation functions will + have different behaviors with/without grouping keys. For example, COUNT(*) + is always positive if there are grouping keys, but if there are no + grouping keys, the answer could be 0. This needs to be set before the + shuttle is used, and the default is False. + - `stack`: A stack of predicate sets corresponding to all inputs to the + current expression. Used for simplifying function calls by first + simplifying their inputs and placing their predicate sets on the stack. """ def __init__(self): @@ -261,7 +278,22 @@ def simplify_function_call( no_group_aggregate: bool, ) -> RelationalExpression: """ - TODO + Procedure to simplify a function call expression based on the operator + and the predicates of its arguments. This assumes that the arguments + have already been simplified. + + Args: + `expr`: The CallExpression to simplify, whose arguments have already + been simplified. + `arg_predicates`: A list of PredicateSet objects corresponding to + the predicates of the arguments of the expression. + `no_group_aggregate`: Whether the expression is part of a no-group + aggregate. + + Returns: + The simplified expression with the predicates updated based on the + simplification rules. The predicates for the output are placed on + the stack. """ output_expr: RelationalExpression = expr output_predicates: PredicateSet = PredicateSet() @@ -698,7 +730,20 @@ def simplify_window_call( arg_predicates: list[PredicateSet], ) -> RelationalExpression: """ - TODO + Procedure to simplify a window call expression based on the operator + and the predicates of its arguments. This assumes that the arguments + have already been simplified. + + Args: + `expr`: The WindowCallExpression to simplify, whose arguments have + already been simplified. + `arg_predicates`: A list of PredicateSet objects corresponding to + the predicates of the arguments of the expression. + + Returns: + The simplified expression with the predicates updated based on + the simplification rules. The predicates for the output are placed + on the stack. """ output_predicates: PredicateSet = PredicateSet() output_expr: RelationalExpression = expr @@ -747,7 +792,12 @@ def simplify_window_call( class SimplificationVisitor(RelationalVisitor): """ - TODO + Relational visitor implementation that simplifies relational expressions + within the relational tree and its subtrees in-place. The visitor first + transforms all the subtrees and collects predicate set information for the + output columns of each node, then uses those predicates to simplify the + expressions of the current node. The predicates for the output predicates of + the current node are placed on the stack. """ def __init__(self): @@ -762,11 +812,19 @@ def get_input_predicates( self, node: RelationalNode ) -> dict[RelationalExpression, PredicateSet]: """ - TODO + Recursively simplifies the inputs to the current node and collects + the predicates for each column from all of the inputs to the current + node. + + Args: + `node`: The current relational node whose inputs are being + simplified. + + Returns: + A dictionary mapping each input column reference from a column from + an input to the current node to the set of its inferred predicates. """ - # Recursively invoke the procedure on all inputs to the node. self.visit_inputs(node) - # For each input, pop the predicates from the stack and add them # to the input predicates dictionary, using the appropriate input alias. input_predicates: dict[RelationalExpression, PredicateSet] = {} @@ -782,14 +840,29 @@ def generic_visit( self, node: RelationalNode ) -> dict[RelationalExpression, PredicateSet]: """ - TODO + The generic pattern for relational simplification used by most of the + relational nodes as a base. It simplifies all descendants of the current + node, and uses the predicates from the inputs to transform all of the + expressions of the current node in-place. The predicates for the output + columns of the current node are returned as a dictionary mapping each + output column reference to its set of predicates. + + Args: + `node`: The current relational node to simplify. + + Returns: + A dictionary mapping each output column reference from the current + node to the set of its inferred predicates. """ + # Simplify the inputs to the current node and collect the predicates + # for each column from the inputs. input_predicates: dict[RelationalExpression, PredicateSet] = ( self.get_input_predicates(node) ) + # Set the input predicates and no-group-aggregate state for the shuttle. self.shuttle.input_predicates = input_predicates - self.shuttle.no_group_aggregate = not ( - isinstance(node, Aggregate) and not node.keys + self.shuttle.no_group_aggregate = ( + isinstance(node, Aggregate) and len(node.keys) == 0 ) # Transform the expressions of the current node in-place. ref_expr: RelationalExpression @@ -865,26 +938,16 @@ def visit_root(self, node: RelationalRoot) -> None: self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: - input_predicates: dict[RelationalExpression, PredicateSet] = ( - self.get_input_predicates(node) + output_predicates: dict[RelationalExpression, PredicateSet] = ( + self.generic_visit(node) ) - output_predicates: dict[RelationalExpression, PredicateSet] = {} - # Transform the keys & aggregates separately - self.shuttle.input_predicates = input_predicates - self.shuttle.no_group_aggregate = False - for name, expr in node.keys.items(): - ref_expr = ColumnReference(name, expr.data_type) - node.keys[name] = expr.accept_shuttle(self.shuttle) - output_predicates[ref_expr] = self.shuttle.stack.pop() - node.columns[name] = node.keys[name] - self.shuttle.no_group_aggregate = not node.keys - for name, expr in node.aggregations.items(): - ref_expr = ColumnReference(name, expr.data_type) - new_agg = expr.accept_shuttle(self.shuttle) - output_predicates[ref_expr] = self.shuttle.stack.pop() - assert isinstance(new_agg, CallExpression) - node.aggregations[name] = new_agg - node.columns[name] = node.aggregations[name] + # Transform the keys & aggregations to match the columns. + for name in node.keys: + node.keys[name] = node.columns[name] + for name in node.aggregations: + expr = node.columns[name] + assert isinstance(expr, CallExpression) + node.aggregations[name] = expr self.stack.append(output_predicates) From 8d0fc6bc6281d3122a74646429c95946d5c05f44 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:25:44 -0400 Subject: [PATCH 70/97] Revisions --- .../conversion/relational_simplification.py | 70 ++++++++++--------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index f098da6eb..1cba39855 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -111,52 +111,56 @@ def intersect(predicates: list["PredicateSet"]) -> "PredicateSet": NULL_PROPAGATING_OPS: set[pydop.PyDoughOperator] = { + pydop.ABS, pydop.ADD, - pydop.SUB, - pydop.MUL, pydop.BAN, pydop.BOR, - pydop.NOT, - pydop.LOWER, - pydop.UPPER, - pydop.LENGTH, - pydop.STRIP, - pydop.REPLACE, - pydop.FIND, - pydop.ABS, + pydop.BXR, pydop.CEIL, - pydop.FLOOR, - pydop.ROUND, + pydop.CONTAINS, + pydop.DATEDIFF, + pydop.DAY, + pydop.DAYNAME, + pydop.DAYOFWEEK, + pydop.ENDSWITH, pydop.EQU, - pydop.NEQ, + pydop.FIND, + pydop.FLOOR, pydop.GEQ, pydop.GRT, - pydop.LET, + pydop.HOUR, + pydop.JOIN_STRINGS, + pydop.LARGEST, + pydop.LENGTH, pydop.LEQ, - pydop.BXR, - pydop.STARTSWITH, - pydop.ENDSWITH, - pydop.CONTAINS, + pydop.LET, pydop.LIKE, - pydop.SIGN, - pydop.SMALLEST, - pydop.LARGEST, - pydop.IFF, - pydop.YEAR, - pydop.MONTH, - pydop.DAY, - pydop.HOUR, + pydop.LOWER, + pydop.LPAD, pydop.MINUTE, + pydop.MONOTONIC, + pydop.MONTH, + pydop.MUL, + pydop.NEQ, + pydop.NOT, + pydop.REPLACE, + pydop.ROUND, + pydop.RPAD, pydop.SECOND, - pydop.DATEDIFF, - pydop.DAYNAME, - pydop.DAYOFWEEK, + pydop.SIGN, pydop.SLICE, - pydop.LPAD, - pydop.RPAD, - pydop.MONOTONIC, - pydop.JOIN_STRINGS, + pydop.SMALLEST, + pydop.STARTSWITH, + pydop.STRIP, + pydop.SUB, + pydop.UPPER, + pydop.YEAR, } +""" +A set of operators that only output null if one of the inputs is null. This set +is significant because it means that if all of the inputs to a function are +guaranteed to be non-null, the output is guaranteed to be non-null as well. +""" class SimplificationShuttle(RelationalExpressionShuttle): From 22a94ab9223aa1cfc58f90a5047ae50c178fa8cf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:30:51 -0400 Subject: [PATCH 71/97] Stack cleanup --- pydough/conversion/relational_simplification.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 1cba39855..cdaaff5a6 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -901,6 +901,7 @@ def visit_filter(self, node: Filter) -> None: ) # Transform the filter condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_join(self, node: Join) -> None: @@ -909,6 +910,7 @@ def visit_join(self, node: Join) -> None: ) # Transform the join condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() # If the join is not an inner join, remove any not-null predicates # from the RHS of the join. if node.join_type != JoinType.INNER: @@ -927,6 +929,7 @@ def visit_limit(self, node: Limit) -> None: # Transform the order keys in-place. for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_root(self, node: RelationalRoot) -> None: @@ -939,6 +942,7 @@ def visit_root(self, node: RelationalRoot) -> None: # Transform the order keys in-place. for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) + self.shuttle.stack.pop() self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: From a9716763b91edff7115ce3f0acd2acaed7d293a4 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:38:00 -0400 Subject: [PATCH 72/97] Adding additional shuttle framework --- pydough/conversion/relational_converter.py | 14 +++++++--- .../conversion/relational_simplification.py | 27 ++++++++++++++++--- .../relational_expression_shuttle.py | 6 +++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 6c1e86694..8ddf78c9a 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -43,6 +43,7 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionShuttle, RelationalNode, RelationalRoot, Scan, @@ -1405,7 +1406,9 @@ def confirm_root(node: RelationalNode) -> RelationalRoot: def optimize_relational_tree( - root: RelationalRoot, configs: PyDoughConfigs + root: RelationalRoot, + configs: PyDoughConfigs, + additional_shuttles: list[RelationalExpressionShuttle], ) -> RelationalRoot: """ Runs optimize on the relational tree, including pushing down filters and @@ -1414,6 +1417,8 @@ def optimize_relational_tree( Args: `root`: the relational root to optimize. `configs`: the configuration settings to use during optimization. + `additional_shuttles`: additional relational expression shuttles to use + for expression simplification. Returns: The optimized relational root. @@ -1468,7 +1473,7 @@ def optimize_relational_tree( # pullup and pushdown and so on. for _ in range(2): root = confirm_root(pullup_projections(root)) - simplify_expressions(root) + simplify_expressions(root, additional_shuttles) root._input = push_filters(root.input, set()) root = ColumnPruner().prune_unused_columns(root) @@ -1535,6 +1540,9 @@ def convert_ast_to_relational( raw_result: RelationalRoot = postprocess_root(node, columns, hybrid, output) # Invoke the optimization procedures on the result to clean up the tree. - optimized_result: RelationalRoot = optimize_relational_tree(raw_result, configs) + additional_shuttles: list[RelationalExpressionShuttle] = [] + optimized_result: RelationalRoot = optimize_relational_tree( + raw_result, configs, additional_shuttles + ) return optimized_result diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index cdaaff5a6..f9d8ef4c4 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -804,13 +804,18 @@ class SimplificationVisitor(RelationalVisitor): the current node are placed on the stack. """ - def __init__(self): + def __init__(self, additional_shuttles: list[RelationalExpressionShuttle]): self.stack: list[dict[RelationalExpression, PredicateSet]] = [] self.shuttle: SimplificationShuttle = SimplificationShuttle() + self.additional_shuttles: list[RelationalExpressionShuttle] = ( + additional_shuttles + ) def reset(self): self.stack.clear() self.shuttle.reset() + for shuttle in self.additional_shuttles: + shuttle.reset() def get_input_predicates( self, node: RelationalNode @@ -873,8 +878,11 @@ def generic_visit( output_predicates: dict[RelationalExpression, PredicateSet] = {} for name, expr in node.columns.items(): ref_expr = ColumnReference(name, expr.data_type) - node.columns[name] = expr.accept_shuttle(self.shuttle) + expr = expr.accept_shuttle(self.shuttle) output_predicates[ref_expr] = self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + expr = expr.accept_shuttle(shuttle) + node.columns[name] = expr return output_predicates def visit_scan(self, node: Scan) -> None: @@ -902,6 +910,8 @@ def visit_filter(self, node: Filter) -> None: # Transform the filter condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + node._condition = node.condition.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_join(self, node: Join) -> None: @@ -911,6 +921,8 @@ def visit_join(self, node: Join) -> None: # Transform the join condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + node._condition = node.condition.accept_shuttle(shuttle) # If the join is not an inner join, remove any not-null predicates # from the RHS of the join. if node.join_type != JoinType.INNER: @@ -930,6 +942,8 @@ def visit_limit(self, node: Limit) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_root(self, node: RelationalRoot) -> None: @@ -943,6 +957,8 @@ def visit_root(self, node: RelationalRoot) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() + for shuttle in self.additional_shuttles: + ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: @@ -961,6 +977,7 @@ def visit_aggregate(self, node: Aggregate) -> None: def simplify_expressions( node: RelationalNode, + additional_shuttles: list[RelationalExpressionShuttle], ) -> None: """ Transforms the current node and all of its descendants in-place to simplify @@ -968,6 +985,10 @@ def simplify_expressions( Args: `node`: The relational node to perform simplification on. + `additional_shuttles`: A list of additional shuttles to apply to the + expressions of the node and its descendants. These shuttles are applied + after the simplification shuttle, and can be used to perform additional + transformations on the expressions. """ - simplifier: SimplificationVisitor = SimplificationVisitor() + simplifier: SimplificationVisitor = SimplificationVisitor(additional_shuttles) node.accept(simplifier) diff --git a/pydough/relational/relational_expressions/relational_expression_shuttle.py b/pydough/relational/relational_expressions/relational_expression_shuttle.py index d43642e35..e67326c41 100644 --- a/pydough/relational/relational_expressions/relational_expression_shuttle.py +++ b/pydough/relational/relational_expressions/relational_expression_shuttle.py @@ -23,6 +23,12 @@ class RelationalExpressionShuttle(ABC): at the end of each visit. """ + def reset(self): + """ + Reset the shuttle to its initial state. + This is useful if the shuttle is reused for multiple visits. + """ + def visit_call_expression( self, call_expression: CallExpression ) -> RelationalExpression: From 9c6caa21484780430af5fe4725c3a7e81e3778f3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:45:39 -0400 Subject: [PATCH 73/97] [RUN CI] From 95e59d10f3a1b829de6d09fff472e1391fcf94d5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 24 Jul 2025 01:47:33 -0400 Subject: [PATCH 74/97] [RUN CI] From b63c5d40ad33a3b8937a1eee158315231249c110 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:36:06 -0400 Subject: [PATCH 75/97] Added more simplfication patterns to tests --- .../conversion/relational_simplification.py | 53 ++++++----- tests/test_pipeline_defog_custom.py | 94 ++++++++++++++++++- tests/test_plan_refsols/simplification_2.txt | 5 +- tests/test_plan_refsols/simplification_3.txt | 7 +- .../simplification_2_ansi.sql | 15 ++- .../simplification_2_sqlite.sql | 15 ++- .../simplification_3_ansi.sql | 58 +++++++++++- .../simplification_3_sqlite.sql | 87 ++++++++++++++++- 8 files changed, 290 insertions(+), 44 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index f9d8ef4c4..a1e20c7df 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -382,31 +382,34 @@ def simplify_function_call( output_predicates.not_null = True case pydop.DEFAULT_TO: - # DEFAULT_TO(None, x) -> x - if ( - isinstance(expr.inputs[0], LiteralExpression) - and expr.inputs[0].value is None - ): - if len(expr.inputs) == 2: - output_expr = expr.inputs[1] - output_predicates = arg_predicates[1] - else: - output_expr = CallExpression( - pydop.DEFAULT_TO, expr.data_type, expr.inputs[1:] - ) - output_predicates |= PredicateSet.intersect(arg_predicates[1:]) - - # DEFAULT_TO(x, y) -> x if x is non-null. - elif arg_predicates[0].not_null: - output_expr = expr.inputs[0] - output_predicates |= arg_predicates[0] - - # Otherwise, it is non-null if any of the arguments are non-null, - # and gains any predicates that all the arguments have in common. + # Modify the list of arguments by removing any that are None, + # and stopping once we find the first argument that has is + # non-null. + new_args: list[RelationalExpression] = [] + new_predicates: list[PredicateSet] = [] + for i, arg in enumerate(expr.inputs): + if isinstance(arg, LiteralExpression) and arg.value is None: + continue + new_args.append(arg) + new_predicates.append(arg_predicates[i]) + if arg_predicates[i].not_null: + break + if len(new_args) == 0: + # If all inputs are None, the output is None. + output_expr = LiteralExpression(None, expr.data_type) + elif len(new_args) == 1: + # If there is only one input, the output is that input. + output_expr = new_args[0] + output_predicates |= new_predicates[0] else: - if union_set.not_null: + # If there are multiple inputs, the output is a new + # DEFAULT_TO expression with the non-None inputs. + output_expr = CallExpression( + pydop.DEFAULT_TO, expr.data_type, new_args + ) + output_predicates = PredicateSet.intersect(new_predicates) + if PredicateSet.union(new_predicates).not_null: output_predicates.not_null = True - output_predicates |= intersect_set # ABS(x) -> x if x is positive or non-negative. At hte very least, we # know it is always non-negative. @@ -552,8 +555,8 @@ def simplify_function_call( output_expr = LiteralExpression(False, expr.data_type) output_predicates.not_negative = True - # XOR and LIKE are always non-negative - case pydop.BXR | pydop.LIKE: + # LIKE is always non-negative + case pydop.LIKE: output_predicates.not_negative = True # X & Y is False if any of the arguments are False-y literals, and True diff --git a/tests/test_pipeline_defog_custom.py b/tests/test_pipeline_defog_custom.py index 58265f72b..f85afa9c2 100644 --- a/tests/test_pipeline_defog_custom.py +++ b/tests/test_pipeline_defog_custom.py @@ -1712,6 +1712,16 @@ def get_day_of_week( " s35 = True == False," # -> False " s36 = True != False," # -> True " s37 = SQRT(9)," # -> 3.0 + " s38 = COUNT(customers) == None," # -> None + " s39 = None >= COUNT(customers)," # -> None + " s40 = COUNT(customers) > None," # -> None + " s41 = None < COUNT(customers)," # -> None + " s42 = None <= COUNT(customers)," # -> None + " s43 = None + COUNT(customers)," # -> None + " s44 = COUNT(customers) - None," # -> None + " s45 = None * COUNT(customers)," # -> None + " s46 = COUNT(customers) / None," # -> None + " s47 = ABS(DEFAULT_TO(LIKE(DEFAULT_TO(MAX(customers.name), ''), '%r%'), 1))" # -> COALESCE(MAX(sbcustname), '') LIKE '%r%' ")", "Broker", lambda: pd.DataFrame( @@ -1754,6 +1764,16 @@ def get_day_of_week( "s35": [0], "s36": [1], "s37": [3.0], + "s38": [None], + "s39": [None], + "s40": [None], + "s41": [None], + "s42": [None], + "s43": [None], + "s44": [None], + "s45": [None], + "s46": [None], + "s47": [1], } ), "simplification_2", @@ -1762,6 +1782,18 @@ def get_day_of_week( ), pytest.param( PyDoughPandasTest( + "cust_info = customers.CALCULATE(p=DEFAULT_TO(INTEGER(postal_code), 0))" + " .CALCULATE(" + " rank = RANKING(by=name.ASC())," + " rsum1 = DEFAULT_TO(RELSUM(ABS(p)), 0.1)," + " rsum2 = DEFAULT_TO(RELSUM(ABS(p), by=name.ASC(), cumulative=True), 0.1)," + " ravg1 = DEFAULT_TO(RELAVG(ABS(p)), 0.1)," + " ravg2 = DEFAULT_TO(RELAVG(ABS(p), by=name.ASC(), frame=(None, -1)), 0.1)," + " rcnt1 = DEFAULT_TO(RELCOUNT(INTEGER(postal_code)), 0.1)," + " rcnt2 = DEFAULT_TO(RELCOUNT(INTEGER(postal_code), by=name.ASC(), cumulative=True), 0.1)," + " rsiz1 = DEFAULT_TO(RELSIZE(), 0.1)," + " rsiz2 = DEFAULT_TO(RELSIZE(by=name.ASC(), frame=(1, None)), 0.1)," + ")\n" "result = Broker.CALCULATE(" " s00 = MONOTONIC(1, 2, 3)," # -> True " s01 = MONOTONIC(1, 1, 1)," # -> True @@ -1769,10 +1801,37 @@ def get_day_of_week( " s03 = MONOTONIC(1, 4, 3)," # -> False " s04 = MONOTONIC(1, 2, 1)," # -> False " s05 = MONOTONIC(1, 0, 1)," # -> False - " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(customers))," # -> 3 <= COUNT(customers) - " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(customers))," # False - " s08 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 9)," # -> COUNT(customers) <= 6 - " s09 = MONOTONIC(COUNT(customers), LENGTH('foobar'), 5)," # -> False + " s06 = MONOTONIC(1, LENGTH('foo'), COUNT(cust_info))," # -> 3 <= COUNT(*) + " s07 = MONOTONIC(10, LENGTH('foo'), COUNT(cust_info))," # False + " s08 = MONOTONIC(COUNT(cust_info), LENGTH('foobar'), 9)," # -> COUNT(*) <= 6 + " s09 = MONOTONIC(COUNT(cust_info), LENGTH('foobar'), 5)," # -> False + " s10 = 13 * 7," # -> 91 + " s11 = 42 * LENGTH('')," # -> 0 + " s12 = 42 + LENGTH('fizzbuzz')," # -> 50 + " s13 = 50 - 15," # -> 35 + " s14 = 50 / 2," # -> 25 + " s15 = ABS(COUNT(cust_info) * -0.75)," # -> not simplified + " s16 = DEFAULT_TO(10, COUNT(cust_info))," # -> 10 + " s17 = DEFAULT_TO(None, None, None, COUNT(cust_info))," # -> COUNT(*) + " s18 = DEFAULT_TO(None, None, COUNT(cust_info), None, -1)," # -> COUNT(*) + " s19 = STARTSWITH('', 'a')," # -> False + " s20 = STARTSWITH('a', '')," # -> True + " s21 = ENDSWITH('', 'a')," # -> False + " s22 = ENDSWITH('a', '')," # -> True + " s23 = CONTAINS('', 'a')," # -> False + " s24 = CONTAINS('a', '')," # -> True + " s25 = ABS(QUANTILE(ABS(INTEGER(cust_info.postal_code)), 0.25))," # -> QUANTILE(ABS(INTEGER(cust_info.postal_code)), 0.25) + " s26 = ABS(MEDIAN(ABS(INTEGER(cust_info.postal_code))))," # -> MEDIAN(ABS(INTEGER(cust_info.postal_code))) + " s27 = ABS(MIN(cust_info.rank))," # -> MIN(cust_info.rank) + " s28 = ABS(MAX(cust_info.rank))," # -> MAX(cust_info.rank) + " s29 = ABS(ANYTHING(cust_info.rsum1))," # -> ANYTHING(cust_info.rsum1) + " s30 = ROUND(ABS(SUM(cust_info.rsum2)), 2)," # -> ROUND(SUM(cust_info.rsum2), 2) + " s31 = ABS(ANYTHING(cust_info.ravg1))," # -> ANYTHING(cust_info.ravg1) + " s32 = ROUND(ABS(SUM(cust_info.ravg2)), 2)," # -> ROUND(SUM(cust_info.ravg2), 2) + " s33 = ABS(ANYTHING(cust_info.rcnt1))," # -> ANYTHING(cust_info.rcnt1) + " s34 = ROUND(ABS(SUM(cust_info.rcnt2)), 2)," # -> ROUND(SUM(cust_info.rcnt2), 2) + " s35 = ABS(ANYTHING(cust_info.rsiz1))," # -> ANYTHING(cust_info.rsiz1) + " s36 = ROUND(ABS(SUM(cust_info.rsiz2)), 2)," # -> ROUND(SUM(cust_info.rsiz2), 2) ")", "Broker", lambda: pd.DataFrame( @@ -1787,6 +1846,33 @@ def get_day_of_week( "s07": [0], "s08": [0], "s09": [0], + "s10": [91], + "s11": [0], + "s12": [50], + "s13": [35], + "s14": [25.0], + "s15": [15.0], + "s16": [10], + "s17": [20], + "s18": [20], + "s19": [0], + "s20": [1], + "s21": [0], + "s22": [1], + "s23": [0], + "s24": [1], + "s25": [10002], + "s26": [54050.5], + "s27": [1], + "s28": [20], + "s29": [1027021], + "s30": [9096414.0], + "s31": [51351.05], + "s32": [802375.94], + "s33": [20], + "s34": [210.0], + "s35": [20], + "s36": [190.0], } ), "simplification_3", diff --git a/tests/test_plan_refsols/simplification_2.txt b/tests/test_plan_refsols/simplification_2.txt index 4e9433c17..f15a3a4fc 100644 --- a/tests/test_plan_refsols/simplification_2.txt +++ b/tests/test_plan_refsols/simplification_2.txt @@ -1,2 +1,3 @@ -ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric)], orderings=[]) - EMPTYSINGLETON() +ROOT(columns=[('s00', True:bool), ('s01', False:bool), ('s02', True:bool), ('s03', False:bool), ('s04', True:bool), ('s05', False:bool), ('s06', None:bool), ('s07', None:bool), ('s08', None:bool), ('s09', None:bool), ('s10', None:bool), ('s11', None:bool), ('s12', False:bool), ('s13', True:bool), ('s14', False:bool), ('s15', False:bool), ('s16', True:bool), ('s17', True:bool), ('s18', True:bool), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', False:bool), ('s25', True:bool), ('s26', True:bool), ('s27', False:bool), ('s28', True:bool), ('s29', False:bool), ('s30', 8:numeric), ('s31', 'alphabet':string), ('s32', 'SOUP':string), ('s33', True:bool), ('s34', False:bool), ('s35', False:bool), ('s36', True:bool), ('s37', 3.0:numeric), ('s38', n_rows == None:unknown), ('s39', n_rows <= None:unknown), ('s40', n_rows > None:unknown), ('s41', n_rows > None:unknown), ('s42', n_rows >= None:unknown), ('s43', None:unknown + n_rows), ('s44', n_rows - None:unknown), ('s45', None:unknown * n_rows), ('s46', n_rows / None:unknown), ('s47', LIKE(DEFAULT_TO(max_sbCustName, '':string), '%r%':string))], orderings=[]) + AGGREGATE(keys={}, aggregations={'max_sbCustName': MAX(sbCustName), 'n_rows': COUNT()}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName}) diff --git a/tests/test_plan_refsols/simplification_3.txt b/tests/test_plan_refsols/simplification_3.txt index 8078734b7..9c1130483 100644 --- a/tests/test_plan_refsols/simplification_3.txt +++ b/tests/test_plan_refsols/simplification_3.txt @@ -1,3 +1,4 @@ -ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=main.sbCustomer, columns={}) +ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool), ('s10', 91:numeric), ('s11', 42:numeric * 0:numeric), ('s12', 42:numeric + 8:numeric), ('s13', 35:numeric), ('s14', 25.0:numeric), ('s15', ABS(n_rows * -0.75:numeric)), ('s16', 10:numeric), ('s17', n_rows), ('s18', n_rows), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', True:bool), ('s25', agg_1), ('s26', median_expr_13), ('s27', min_rank), ('s28', max_rank), ('s29', anything_rsum1), ('s30', ROUND(sum_rsum2, 2:numeric)), ('s31', anything_ravg1), ('s32', ROUND(sum_ravg2, 2:numeric)), ('s33', anything_rcnt1), ('s34', ROUND(sum_rcnt2, 2:numeric)), ('s35', anything_rsiz1), ('s36', ROUND(sum_rsiz2, 2:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'agg_1': QUANTILE(ABS(INTEGER(sbCustPostalCode)), 0.25:numeric), 'anything_ravg1': ANYTHING(ravg1), 'anything_rcnt1': ANYTHING(rcnt1), 'anything_rsiz1': ANYTHING(rsiz1), 'anything_rsum1': ANYTHING(rsum1), 'max_rank': MAX(rank), 'median_expr_13': MEDIAN(ABS(INTEGER(sbCustPostalCode))), 'min_rank': MIN(rank), 'n_rows': COUNT(), 'sum_ravg2': SUM(ravg2), 'sum_rcnt2': SUM(rcnt2), 'sum_rsiz2': SUM(rsiz2), 'sum_rsum2': SUM(rsum2)}) + PROJECT(columns={'rank': RANKING(args=[], partition=[], order=[(sbCustName):asc_last]), 'ravg1': RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'ravg2': DEFAULT_TO(RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], frame=(None, -1)), 0.1:numeric), 'rcnt1': RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[]), 'rcnt2': DEFAULT_TO(RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'rsiz1': RELSIZE(args=[], partition=[], order=[]), 'rsiz2': DEFAULT_TO(RELSIZE(args=[], partition=[], order=[(sbCustName):asc_last], frame=(1, None)), 0.1:numeric), 'rsum1': RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'rsum2': DEFAULT_TO(RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'sbCustPostalCode': sbCustPostalCode}) + SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName, 'sbCustPostalCode': sbCustPostalCode}) diff --git a/tests/test_sql_refsols/simplification_2_ansi.sql b/tests/test_sql_refsols/simplification_2_ansi.sql index 237dabec3..963c3a426 100644 --- a/tests/test_sql_refsols/simplification_2_ansi.sql +++ b/tests/test_sql_refsols/simplification_2_ansi.sql @@ -36,6 +36,15 @@ SELECT FALSE AS s34, FALSE AS s35, TRUE AS s36, - 3.0 AS s37 -FROM (VALUES - (NULL)) AS _q_0(_col_0) + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_2_sqlite.sql b/tests/test_sql_refsols/simplification_2_sqlite.sql index b5d5d71f5..963c3a426 100644 --- a/tests/test_sql_refsols/simplification_2_sqlite.sql +++ b/tests/test_sql_refsols/simplification_2_sqlite.sql @@ -36,6 +36,15 @@ SELECT FALSE AS s34, FALSE AS s35, TRUE AS s36, - 3.0 AS s37 -FROM (VALUES - (NULL)) AS _q_0 + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_ansi.sql b/tests/test_sql_refsols/simplification_3_ansi.sql index 3b49cd41b..c1f6211b7 100644 --- a/tests/test_sql_refsols/simplification_3_ansi.sql +++ b/tests/test_sql_refsols/simplification_3_ansi.sql @@ -1,3 +1,29 @@ +WITH _t1 AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY sbcustname NULLS LAST) AS rank, + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS ravg1, + COALESCE( + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2, + sbcustpostalcode + FROM main.sbcustomer +) SELECT TRUE AS s00, TRUE AS s01, @@ -8,5 +34,33 @@ SELECT COUNT(*) >= 3 AS s06, FALSE AS s07, COUNT(*) <= 6 AS s08, - FALSE AS s09 -FROM main.sbcustomer + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + ABS(CAST(sbcustpostalcode AS BIGINT)) NULLS LAST) AS s25, + MEDIAN(ABS(CAST(sbcustpostalcode AS BIGINT))) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + ANY_VALUE(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + ANY_VALUE(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + ANY_VALUE(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + ANY_VALUE(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 diff --git a/tests/test_sql_refsols/simplification_3_sqlite.sql b/tests/test_sql_refsols/simplification_3_sqlite.sql index 3b49cd41b..a31640d1b 100644 --- a/tests/test_sql_refsols/simplification_3_sqlite.sql +++ b/tests/test_sql_refsols/simplification_3_sqlite.sql @@ -1,3 +1,59 @@ +WITH _t2 AS ( + SELECT + ABS(CAST(sbcustpostalcode AS INTEGER)) AS expr_13, + ROW_NUMBER() OVER (ORDER BY sbcustname) AS rank, + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER () AS ravg1, + COALESCE( + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS INTEGER)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS INTEGER)) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2 + FROM main.sbcustomer +), _t1 AS ( + SELECT + CASE + WHEN CAST(0.75 * COUNT(expr_13) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY expr_13 DESC) + THEN expr_13 + ELSE NULL + END AS expr_15, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - 1.0 + ) - ( + CAST(( + COUNT(expr_13) OVER () - 1.0 + ) AS REAL) / 2.0 + ) + ) < 1.0 + THEN expr_13 + ELSE NULL + END AS expr_16, + rank, + ravg1, + ravg2, + rcnt1, + rcnt2, + rsiz1, + rsiz2, + rsum1, + rsum2 + FROM _t2 +) SELECT TRUE AS s00, TRUE AS s01, @@ -8,5 +64,32 @@ SELECT COUNT(*) >= 3 AS s06, FALSE AS s07, COUNT(*) <= 6 AS s08, - FALSE AS s09 -FROM main.sbcustomer + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + MAX(expr_15) AS s25, + AVG(expr_16) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + MAX(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + MAX(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + MAX(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + MAX(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 From 02c24bd54e79b63ef8acc9b265deb7e2938968aa Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:41:53 -0400 Subject: [PATCH 76/97] Revisions --- .../conversion/relational_simplification.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index a1e20c7df..af2d88137 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1,5 +1,9 @@ """ -Logic used to simplify relational expressions in a relational node. +Logic used to simplify relational expressions in a relational node. A visitor +is used on the relational nodes to first simplify the child subtrees, then a +relational shuttle is run on the expressions of the current node to simplify +them, using the input predicates from the child nodes, and also infer the +predicates of the simplified expressions. """ __all__ = ["simplify_expressions"] @@ -91,7 +95,7 @@ def union(predicates: list["PredicateSet"]) -> "PredicateSet": Computes the union of a list of predicate sets. """ result: PredicateSet = PredicateSet() - for pred in predicates[1:]: + for pred in predicates: result = result | pred return result @@ -304,7 +308,7 @@ def simplify_function_call( union_set: PredicateSet = PredicateSet.union(arg_predicates) intersect_set: PredicateSet = PredicateSet.intersect(arg_predicates) - # If the call has null propagating rules, all of hte arguments are + # If the call has null propagating rules, all of the arguments are # non-null, the output is guaranteed to be non-null. if expr.op in NULL_PROPAGATING_OPS: if intersect_set.not_null: @@ -411,7 +415,7 @@ def simplify_function_call( if PredicateSet.union(new_predicates).not_null: output_predicates.not_null = True - # ABS(x) -> x if x is positive or non-negative. At hte very least, we + # ABS(x) -> x if x is positive or non-negative. At the very least, we # know it is always non-negative. case pydop.ABS: if arg_predicates[0].not_negative or arg_predicates[0].positive: @@ -434,7 +438,9 @@ def simplify_function_call( # LOWER, UPPER, STARTSWITH, ENDSWITH, and CONTAINS can be constant # folded if the inputs are string literals. The boolean-returning - # operators are always non-negative. + # operators are always non-negative. Most of cases do not set + # predicates because there are no predicates to infer, beyond those + # already accounted for with NULL_PROPAGATING_OPS. case pydop.LOWER: if isinstance(expr.inputs[0], LiteralExpression) and isinstance( expr.inputs[0].value, str @@ -567,7 +573,7 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(False, expr.data_type) - if all( + elif all( isinstance(arg, LiteralExpression) and arg.value not in [0, False, None] for arg in expr.inputs @@ -584,7 +590,7 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - if all( + elif all( isinstance(arg, LiteralExpression) and arg.value in [0, False, None] for arg in expr.inputs ): @@ -603,7 +609,6 @@ def simplify_function_call( ) output_predicates.positive = not bool(expr.inputs[0].value) output_predicates.not_negative = True - pass case pydop.EQU | pydop.NEQ | pydop.GEQ | pydop.GRT | pydop.LET | pydop.LEQ: match (expr.inputs[0], expr.op, expr.inputs[1]): @@ -669,6 +674,7 @@ def simplify_function_call( output_expr = LiteralExpression(x >= y, expr.data_type) # type: ignore case _: + # All other cases remain non-simplified. pass output_predicates.not_negative = True @@ -727,6 +733,9 @@ def simplify_function_call( output_predicates |= arg_predicates[0] & PredicateSet( not_null=True, not_negative=True ) + case _: + # All other operators remain non-simplified. + pass self.stack.append(output_predicates) return output_expr @@ -793,6 +802,10 @@ def simplify_window_call( output_predicates.positive = True output_predicates.not_negative = True + case _: + # All other operators remain non-simplified. + pass + self.stack.append(output_predicates) return output_expr From 2c65773518a180471fe4b0c80461cd51dc579a55 Mon Sep 17 00:00:00 2001 From: knassre-bodo <105652923+knassre-bodo@users.noreply.github.com> Date: Thu, 31 Jul 2025 00:42:29 -0400 Subject: [PATCH 77/97] Apply suggestions from code review Co-authored-by: Hadia Ahmed --- pydough/conversion/relational_simplification.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index af2d88137..3ca3a19cb 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -211,7 +211,7 @@ def input_predicates(self, value: dict[RelationalExpression, PredicateSet]) -> N @property def no_group_aggregate(self) -> bool: """ - Returns whether the shuttle currently a handling no-group-aggregate. + Returns whether the shuttle is currently handling a no-group-aggregate. """ return self._no_group_aggregate @@ -321,9 +321,9 @@ def simplify_function_call( output_predicates.not_null = True output_predicates.not_negative = True - # The output if COUNT(*) is positive if unless doing a + # The output of COUNT(*) is positive unless doing a # no-groupby aggregation. Same goes for calling COUNT or - # NDISTINCT ona non-null column. + # NDISTINCT on a non-null column. if not no_group_aggregate: if len(expr.inputs) == 0 or arg_predicates[0].not_null: output_predicates.positive = True @@ -339,7 +339,7 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null aor non-negative if their + # All of these operators are non-null or non-negative if their # first argument is. case ( pydop.SUM From cc12363480defa6441f39e8e21669ba47361e47f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 31 Jul 2025 00:43:36 -0400 Subject: [PATCH 78/97] edit --- pydough/conversion/relational_simplification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index af2d88137..5d7ce9aef 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -231,7 +231,7 @@ def visit_literal_expression( output_predicates: PredicateSet = PredicateSet() if literal_expression.value is not None: output_predicates.not_null = True - if isinstance(literal_expression.value, (int, float)): + if isinstance(literal_expression.value, (int, float, bool)): if literal_expression.value >= 0: output_predicates.not_negative = True if literal_expression.value > 0: From 6ec13f1df0266596e74a34c60f6305e1e7f2c6e9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 10:45:47 -0400 Subject: [PATCH 79/97] [RUN CI] --- tests/test_sql_refsols/defog_broker_adv8_ansi.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql index d5d15e56c..c130ba30f 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql @@ -7,4 +7,4 @@ JOIN main.sbcustomer AS sbcustomer AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE sbtransaction.sbtxdatetime < DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()) - AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -1, 'WEEK') + AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -7, 'DAY') From 9344c9a13f46f61bd6b60801cdcd44468b4f6049 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 10:58:51 -0400 Subject: [PATCH 80/97] Fixing SQL test [RUN CI] --- tests/test_sql_refsols/defog_broker_adv8_ansi.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql index c130ba30f..d5d15e56c 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_ansi.sql @@ -7,4 +7,4 @@ JOIN main.sbcustomer AS sbcustomer AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE sbtransaction.sbtxdatetime < DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()) - AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -7, 'DAY') + AND sbtransaction.sbtxdatetime >= DATE_ADD(DATE_TRUNC('WEEK', CURRENT_TIMESTAMP()), -1, 'WEEK') From 59850bd72af909f11ed55074431afb34eda402b5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 11:10:12 -0400 Subject: [PATCH 81/97] [RUN CI] From f460cdaa0d6eebd96eaf4ea81a9f78e249ba28ee Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 1 Aug 2025 17:00:41 -0400 Subject: [PATCH 82/97] Revision --- tests/test_plan_refsols/smoke_c.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_plan_refsols/smoke_c.txt b/tests/test_plan_refsols/smoke_c.txt index 33bd737f0..c9307612d 100644 --- a/tests/test_plan_refsols/smoke_c.txt +++ b/tests/test_plan_refsols/smoke_c.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('a', n_rows), ('b', DEFAULT_TO(sum_expr_18, 0:numeric)), ('c', DEFAULT_TO(sum_expr_25, 0:numeric)), ('d', ndistinct_c_mktsegment), ('e', ROUND(avg_expr_26, 4:numeric)), ('f', min_c_acctbal), ('g', max_c_acctbal), ('h', anything_expr_27), ('i', count_expr_21), ('j', CEIL(population_variance_expr_21)), ('k', ROUND(sample_variance_expr_19, 4:numeric)), ('l', FLOOR(population_std_expr_19)), ('m', ROUND(sample_std_expr_21, 4:numeric)), ('n', ROUND(avg_expr_22, 2:numeric)), ('o', sum_expr_23), ('p', sum_expr_24), ('q', agg_16), ('r', median_c_acctbal)], orderings=[]) - AGGREGATE(keys={}, aggregations={'agg_16': QUANTILE(c_acctbal, 0.2:numeric), 'anything_expr_27': ANYTHING(SLICE(c_name, None:unknown, 1:numeric, None:unknown)), 'avg_expr_22': AVG(DEFAULT_TO(KEEP_IF(c_acctbal, c_acctbal > 0:numeric), 0:numeric)), 'avg_expr_26': AVG(ABS(c_acctbal)), 'count_expr_21': COUNT(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'max_c_acctbal': MAX(c_acctbal), 'median_c_acctbal': MEDIAN(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'ndistinct_c_mktsegment': NDISTINCT(c_mktsegment), 'population_std_expr_19': POPULATION_STD(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'population_variance_expr_21': POPULATION_VARIANCE(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_std_expr_21': SAMPLE_STD(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_variance_expr_19': SAMPLE_VARIANCE(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'sum_expr_18': SUM(FLOOR(c_acctbal)), 'sum_expr_23': SUM(PRESENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_24': SUM(ABSENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_25': SUM(CEIL(c_acctbal))}) +ROOT(columns=[('a', n_rows), ('b', DEFAULT_TO(sum_expr_18, 0:numeric)), ('c', DEFAULT_TO(sum_expr_25, 0:numeric)), ('d', ndistinct_c_mktsegment), ('e', ROUND(avg_expr_26, 4:numeric)), ('f', min_c_acctbal), ('g', max_c_acctbal), ('h', anything_expr_27), ('i', count_expr_21), ('j', CEIL(population_var_expr_21)), ('k', ROUND(sample_var_expr_19, 4:numeric)), ('l', FLOOR(population_std_expr_19)), ('m', ROUND(sample_std_expr_21, 4:numeric)), ('n', ROUND(avg_expr_22, 2:numeric)), ('o', sum_expr_23), ('p', sum_expr_24), ('q', agg_16), ('r', median_c_acctbal)], orderings=[]) + AGGREGATE(keys={}, aggregations={'agg_16': QUANTILE(c_acctbal, 0.2:numeric), 'anything_expr_27': ANYTHING(SLICE(c_name, None:unknown, 1:numeric, None:unknown)), 'avg_expr_22': AVG(DEFAULT_TO(KEEP_IF(c_acctbal, c_acctbal > 0:numeric), 0:numeric)), 'avg_expr_26': AVG(ABS(c_acctbal)), 'count_expr_21': COUNT(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'max_c_acctbal': MAX(c_acctbal), 'median_c_acctbal': MEDIAN(c_acctbal), 'min_c_acctbal': MIN(c_acctbal), 'n_rows': COUNT(), 'ndistinct_c_mktsegment': NDISTINCT(c_mktsegment), 'population_std_expr_19': POPULATION_STD(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'population_var_expr_21': POPULATION_VAR(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_std_expr_21': SAMPLE_STD(KEEP_IF(c_acctbal, c_acctbal > 0:numeric)), 'sample_var_expr_19': SAMPLE_VAR(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'sum_expr_18': SUM(FLOOR(c_acctbal)), 'sum_expr_23': SUM(PRESENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_24': SUM(ABSENT(KEEP_IF(c_acctbal, c_acctbal > 1000:numeric))), 'sum_expr_25': SUM(CEIL(c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) From 0fa39c781de76f374d59910f44d86bbf5013c15c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 7 Aug 2025 09:42:33 -0400 Subject: [PATCH 83/97] Removing dead comment --- tests/test_pipeline_tpch_custom.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 1680707c1..9bd5b3bf7 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -3265,8 +3265,6 @@ def test_pipeline_e2e_tpch_custom( ), id="bad_cross_6", ), - # TODO: fix the error handling here to give a proper error message - # (currently fails in hybrid due to an assertion) pytest.param( bad_cross_7, None, From 4d899b824d751e396e911917d12b3574f4748d64 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 18 Aug 2025 10:28:25 -0400 Subject: [PATCH 84/97] Adding more comments/docstrings --- pydough/unqualified/unqualified_node.py | 39 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/pydough/unqualified/unqualified_node.py b/pydough/unqualified/unqualified_node.py index 4dc1f57cc..40d8ae6ea 100644 --- a/pydough/unqualified/unqualified_node.py +++ b/pydough/unqualified/unqualified_node.py @@ -893,7 +893,19 @@ def call_function_operator( **kwargs, ) -> UnqualifiedNode: """ - TODO + Creates an invocation of a PyDough (non-window) function operator on the + provided operands and keyword arguments. + + Args: + `operator`: the function operator being called. + `operands`: the list of unqualified nodes being passed as arguments. + `kwargs`: the keyword arguments being passed to the function. These are + used for operators that branch on a keyword, such as variance and + standard deviation which have different sub-operators for population + versus sample. + + Returns: + The unqualified node representing the function call. """ # Check if this is a keyword branching operator @@ -923,25 +935,48 @@ def call_window_operator( operator: pydop.ExpressionWindowOperator, operands: list[UnqualifiedNode], **kwargs ) -> UnqualifiedNode: """ - TODO + Creates an invocation of a PyDough window function operator on the + provided operands and keyword arguments. + + Args: + `operator`: the window function operator being called. + `operands`: the list of unqualified nodes being passed as arguments. + `kwargs`: the keyword arguments being passed to the window function. + These may include `by`, `per`, `n_buckets`, `allow_ties`, `dense`, + `n`, etc. depending on the operator. + + Returns: + The unqualified node representing the window function call. """ match operator: case pydop.PERCENTILE: + # Percentile has an optional `n_buckets` argument, defaulting to 100 is_positive_int.verify(kwargs.get("n_buckets", 100), "`n_buckets` argument") case pydop.RANKING: + # Ranking has optional `allow_ties` and `dense` boolean arguments, + # both defaulting to False is_bool.verify(kwargs.get("allow_ties", False), "`allow_ties` argument") is_bool.verify(kwargs.get("dense", False), "`dense` argument") case pydop.PREV | pydop.NEXT: + # PREV/NEXT have an optional `n` argument, defaulting to 1, which + # could also be a positional argument. is_integer.verify(kwargs.get("n", 1), "`n` argument") if len(operands) > 1: is_integer.verify(operands[1], "`n` argument") + # Extract the `by` argument to the window function, if it has one, and + # verify that it is valid for to have one given the operator and other + # keyword arguments (e.g. cumulative, frame). by: Iterable[UnqualifiedNode] = get_by_arg(kwargs, operator) + + # Any window function can have an optional `per` argument saying which + # ancestor the window function is being computed with regards to. per: str | None = None if "per" in kwargs: per_arg = kwargs.pop("per") is_string.verify(per_arg, "`per` argument") per = per_arg + return UnqualifiedWindow( operator, operands, From e8fd112c854dd3990e06a170c900357bee773b45 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 18 Aug 2025 12:10:36 -0400 Subject: [PATCH 85/97] Initial implementation buggy WIP --- pydough/conversion/join_agg_transpose.py | 165 ++++++++++++++++++ pydough/conversion/relational_converter.py | 5 +- tests/test_plan_refsols/common_prefix_af.txt | 6 +- tests/test_plan_refsols/common_prefix_o.txt | 21 ++- tests/test_plan_refsols/correl_35.txt | 6 +- .../count_cust_supplier_nation_combos.txt | 18 +- .../multi_partition_access_2.txt | 34 ++-- .../multi_partition_access_3.txt | 10 +- .../multi_partition_access_5.txt | 8 +- ...hnograph_incident_rate_by_release_year.txt | 8 +- tests/test_sql_refsols/correl_35_sqlite.sql | 28 +-- .../defog_broker_basic4_ansi.sql | 47 ++--- .../defog_broker_basic4_sqlite.sql | 47 ++--- .../defog_dealership_gen4_ansi.sql | 32 ++-- .../defog_dealership_gen4_sqlite.sql | 40 ++--- ...aph_incident_rate_by_release_year_ansi.sql | 28 +-- ...h_incident_rate_by_release_year_sqlite.sql | 28 +-- 17 files changed, 350 insertions(+), 181 deletions(-) create mode 100644 pydough/conversion/join_agg_transpose.py diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py new file mode 100644 index 000000000..ba54206a9 --- /dev/null +++ b/pydough/conversion/join_agg_transpose.py @@ -0,0 +1,165 @@ +""" """ + +__all__ = ["pull_joins_after_aggregates"] + + +import pydough.pydough_operators as pydop +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + ColumnReferenceFinder, + Join, + JoinType, + RelationalExpression, + RelationalNode, + RelationalRoot, + RelationalShuttle, +) +from pydough.relational.rel_util import ( + add_input_name, +) + + +class JoinAggregateTransposeShuttle(RelationalShuttle): + """ + TODO + """ + + def __init__(self): + self.finder: ColumnReferenceFinder = ColumnReferenceFinder() + + def reset(self): + self.finder.reset() + + def visit_join(self, node: Join) -> RelationalNode: + if isinstance(node.inputs[0], Aggregate): + return self.generic_visit_inputs( + self.join_aggregate_transpose(node, node.inputs[0]) + ) + return super().visit_join(node) + + def join_aggregate_transpose( + self, join: Join, aggregate: Aggregate + ) -> RelationalNode: + """ + Transposes a Join above an Aggregate into an Aggregate above a Join, + when possible. + + Args: + `join`: the Join node above the Aggregate. + `aggregate`: the Aggregate node that is the left input to the Join. + + Returns: + The new RelationalNode tree with the Join and Aggregate transposed, or + the original Join if the transpose is not possible. + """ + # Verify that the join is an inner, left, or semi-join, and that the + # join cardinality is singular (unless the aggregations are not affected + # by a change in cardinality). + aggs_allow_plural: bool = all( + call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) + for call in aggregate.aggregations.values() + ) + if not ( + join.join_type in (JoinType.INNER, JoinType.SEMI) + and (join.cardinality.singular or aggs_allow_plural) + ): + return join + + # Find all of the columns used in the join condition that come from the + # left-hand side of the join. + self.finder.reset() + join.condition.accept(self.finder) + lhs_condition_columns: set[ColumnReference] = { + col + for col in self.finder.get_column_references() + if col.input_name == join.default_input_aliases[0] + } + + # Verify that there is at least one left hand side condition column, + # and all of them are grouping keys in the aggregate. + if len(lhs_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in lhs_condition_columns + ): + return join + + new_join_columns: dict[str, RelationalExpression] = {} + new_key_columns: dict[str, RelationalExpression] = {} + new_aggregate_columns: dict[str, CallExpression] = {} + used_column_names: set[str] = set() + + for col_name, col_expr in join.columns.items(): + self.finder.reset() + col_expr.accept(self.finder) + if all( + expr.input_name == join.default_input_aliases[1] + for expr in self.finder.get_column_references() + ): + new_join_columns[col_name] = col_expr + new_aggregate_columns[col_name] = CallExpression( + pydop.ANYTHING, + col_expr.data_type, + [ColumnReference(col_name, col_expr.data_type)], + ) + used_column_names.add(col_name) + elif not ( + isinstance(col_expr, ColumnReference) + and col_expr.input_name == join.default_input_aliases[0] + ): + return join + + for key_name, key_expr in aggregate.keys.items(): + new_join_columns[key_name] = add_input_name( + key_expr, join.default_input_aliases[0] + ) + if key_name in used_column_names: + assert False + new_key_columns[key_name] = ColumnReference(key_name, col_expr.data_type) + used_column_names.add(key_name) + + for agg_name, agg_expr in aggregate.aggregations.items(): + for input_expr in agg_expr.inputs: + if not isinstance(input_expr, ColumnReference): + assert False + if input_expr.name in new_join_columns: + assert False + new_join_columns[input_expr.name] = add_input_name( + input_expr, join.default_input_aliases[0] + ) + if agg_name in used_column_names: + assert False + new_aggregate_columns[agg_name] = agg_expr + used_column_names.add(agg_name) + + new_join: Join = Join( + inputs=[aggregate.inputs[0], join.inputs[1]], + condition=join.condition, + columns=new_join_columns, + join_type=join.join_type, + cardinality=join.cardinality, + ) + + new_aggregate = Aggregate( + input=new_join, keys=new_key_columns, aggregations=new_aggregate_columns + ) + + # print() + # print(join.to_tree_string()) + # print(lhs_condition_columns) + # print(new_join_columns) + # print(new_key_columns) + # print(new_aggregate_columns) + # print(new_aggregate.to_tree_string()) + # breakpoint() + # return join + + return new_aggregate + + +def pull_joins_after_aggregates(node: RelationalRoot) -> RelationalNode: + """ + TODO + """ + shuttle: JoinAggregateTransposeShuttle = JoinAggregateTransposeShuttle() + return node.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 4b35b0cb4..692cbf948 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -84,6 +84,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .join_agg_transpose import pull_joins_after_aggregates from .merge_projects import merge_projects from .projection_pullup import pullup_projections from .relational_simplification import simplify_expressions @@ -1483,7 +1484,8 @@ def optimize_relational_tree( # A: projection pullup # B: expression simplification # C: filter pushdown - # D: column pruning + # D: join-aggregate transpose + # E: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1493,6 +1495,7 @@ def optimize_relational_tree( root = confirm_root(pullup_projections(root)) simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) + root = confirm_root(pull_joins_after_aggregates(root)) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index c35fff43b..8b72513f1 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -8,8 +8,8 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name' AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'c_name': ANYTHING(c_name), 'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 42ae08339..20f4702e6 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -5,17 +5,16 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': ANYTHING(s_acctbal), 'sum_n_rows': COUNT(), 'sum_sum_agg_5': SUM(agg_5), 'sum_sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index ec4994e6d..24073e17b 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -12,8 +12,8 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT(), 'p_type': ANYTHING(p_type)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t0.l_partkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -21,4 +21,4 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index f8c87d703..b243e7f6e 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,11 +1,11 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice), 'supplier_nation': ANYTHING(supplier_nation)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t0.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) + AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'s_nationkey': ANYTHING(s_nationkey), 'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) + AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'ps_suppkey': ANYTHING(ps_suppkey), 'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_suppkey': t1.ps_suppkey, 'sum_l_extendedprice': t0.sum_l_extendedprice}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) @@ -14,6 +14,6 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', s SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index ea1267de4..4d9bd41a7 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -3,26 +3,26 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b58741ac6..9d772a653 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,14 +1,14 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'sbDpClose': ANYTHING(sbDpClose), 'sbTickerType': ANYTHING(sbTickerType)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 5aea3ab12..4d92ec389 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -2,11 +2,11 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_ JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_ticker_type_trans': ANYTHING(n_ticker_type_trans), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 052d1893e..5f01eb40f 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,10 +1,10 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) - AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows_1)}) + AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows_1': COUNT(), 'pr_release': ANYTHING(pr_release)}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'pr_release': t1.pr_release}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) - SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) + SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index bda581395..5419a1b54 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -3,12 +3,12 @@ WITH _s1 AS ( p_partkey, p_type FROM tpch.part -), _s10 AS ( +), _t4 AS ( SELECT COUNT(*) AS n_rows, + MAX(_s11.p_type) AS p_type, customer.c_custkey, customer.c_nationkey, - lineitem.l_partkey, orders.o_orderpriority FROM tpch.customer AS customer JOIN tpch.orders AS orders @@ -31,6 +31,8 @@ WITH _s1 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND lineitem.l_orderkey = orders.o_orderkey + JOIN _s1 AS _s11 + ON _s11.p_partkey = lineitem.l_partkey GROUP BY customer.c_custkey, customer.c_nationkey, @@ -38,19 +40,17 @@ WITH _s1 AS ( orders.o_orderpriority ), _t3 AS ( SELECT - SUM(_s10.n_rows) AS sum_n_rows, - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type - FROM _s10 AS _s10 - JOIN _s1 AS _s11 - ON _s10.l_partkey = _s11.p_partkey + SUM(n_rows) AS sum_n_rows, + c_custkey, + c_nationkey, + o_orderpriority, + p_type + FROM _t4 GROUP BY - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type + c_custkey, + c_nationkey, + o_orderpriority, + p_type ) SELECT COUNT(*) AS n diff --git a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql index 1c7306b78..c3d74d72d 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql @@ -1,34 +1,35 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction + ANY_VALUE(sbcustomer.sbcuststate) AS sbcuststate, + ANY_VALUE(sbticker.sbtickertype) AS sbtickertype, + sbtransaction.sbtxcustid + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY - sbtxcustid, - sbtxtickerid -), _s2 AS ( + sbtransaction.sbtxcustid, + sbtransaction.sbtxtickerid +), _t1 AS ( SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid + SUM(num_transactions) AS num_transactions, + ANY_VALUE(sbcuststate) AS sbcuststate, + sbtickertype + FROM _t2 GROUP BY - sbticker.sbtickertype, - _s0.sbtxcustid + sbtickertype, + sbtxcustid ) SELECT - sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 -JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + sbcuststate AS state, + sbtickertype AS ticker_type, + SUM(num_transactions) AS num_transactions +FROM _t1 GROUP BY - sbcustomer.sbcuststate, - _s2.sbtickertype + sbcuststate, + sbtickertype ORDER BY num_transactions DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql index 1c7306b78..adf243d02 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql @@ -1,34 +1,35 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction + MAX(sbcustomer.sbcuststate) AS sbcuststate, + MAX(sbticker.sbtickertype) AS sbtickertype, + sbtransaction.sbtxcustid + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY - sbtxcustid, - sbtxtickerid -), _s2 AS ( + sbtransaction.sbtxcustid, + sbtransaction.sbtxtickerid +), _t1 AS ( SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid + SUM(num_transactions) AS num_transactions, + MAX(sbcuststate) AS sbcuststate, + sbtickertype + FROM _t2 GROUP BY - sbticker.sbtickertype, - _s0.sbtxcustid + sbtickertype, + sbtxcustid ) SELECT - sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 -JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + sbcuststate AS state, + sbtickertype AS ticker_type, + SUM(num_transactions) AS num_transactions +FROM _t1 GROUP BY - sbcustomer.sbcuststate, - _s2.sbtickertype + sbcuststate, + sbtickertype ORDER BY num_transactions DESC LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 6ac06680b..373ecd582 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,25 +1,25 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, - SUM(sale_price) AS sum_sale_price, - customer_id - FROM main.sales + ANY_VALUE(customers.state) AS state, + SUM(sales.sale_price) AS sum_sale_price, + DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter + FROM main.sales AS sales + JOIN main.customers AS customers + ON customers._id = sales.customer_id WHERE - EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 GROUP BY - DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)), - customer_id + sales.customer_id, + DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) ), _t1 AS ( SELECT - SUM(_s0.sum_sale_price) AS sum_sum_sale_price, - _s0.quarter, - customers.state - FROM _s0 AS _s0 - JOIN main.customers AS customers - ON _s0.customer_id = customers._id + SUM(sum_sale_price) AS sum_sum_sale_price, + quarter, + state + FROM _t2 GROUP BY - _s0.quarter, - customers.state + quarter, + state ) SELECT quarter, diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index 96ad10d92..65ad5eed8 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,41 +1,41 @@ -WITH _s0 AS ( +WITH _t2 AS ( SELECT + MAX(customers.state) AS state, + SUM(sales.sale_price) AS sum_sale_price, DATE( - sale_date, + sales.sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ) AS quarter, - SUM(sale_price) AS sum_sale_price, - customer_id - FROM main.sales + ) AS quarter + FROM main.sales AS sales + JOIN main.customers AS customers + ON customers._id = sales.customer_id WHERE - CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 GROUP BY + sales.customer_id, DATE( - sale_date, + sales.sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' - ), - customer_id + ) ), _t1 AS ( SELECT - SUM(_s0.sum_sale_price) AS sum_sum_sale_price, - _s0.quarter, - customers.state - FROM _s0 AS _s0 - JOIN main.customers AS customers - ON _s0.customer_id = customers._id + SUM(sum_sale_price) AS sum_sum_sale_price, + quarter, + state + FROM _t2 GROUP BY - _s0.quarter, - customers.state + quarter, + state ) SELECT quarter, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index c00e952eb..592b7dd15 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -1,24 +1,24 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - de_product_id - FROM main.devices - GROUP BY - de_product_id -), _s1 AS ( +WITH _s1 AS ( SELECT pr_id, pr_release FROM main.products -), _s6 AS ( +), _t1 AS ( SELECT - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS release_year, - SUM(_s0.n_rows) AS sum_n_rows - FROM _s0 AS _s0 + COUNT(*) AS n_rows_1, + ANY_VALUE(_s1.pr_release) AS pr_release + FROM main.devices AS devices JOIN _s1 AS _s1 - ON _s0.de_product_id = _s1.pr_id + ON _s1.pr_id = devices.de_product_id + GROUP BY + devices.de_product_id +), _s6 AS ( + SELECT + EXTRACT(YEAR FROM CAST(pr_release AS DATETIME)) AS release_year, + SUM(n_rows_1) AS sum_n_rows + FROM _t1 GROUP BY - EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) + EXTRACT(YEAR FROM CAST(pr_release AS DATETIME)) ), _s7 AS ( SELECT COUNT(*) AS n_rows, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index df94defe5..3d44bc799 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -1,24 +1,24 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - de_product_id - FROM main.devices - GROUP BY - de_product_id -), _s1 AS ( +WITH _s1 AS ( SELECT pr_id, pr_release FROM main.products -), _s6 AS ( +), _t1 AS ( SELECT - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS release_year, - SUM(_s0.n_rows) AS sum_n_rows - FROM _s0 AS _s0 + COUNT(*) AS n_rows_1, + MAX(_s1.pr_release) AS pr_release + FROM main.devices AS devices JOIN _s1 AS _s1 - ON _s0.de_product_id = _s1.pr_id + ON _s1.pr_id = devices.de_product_id + GROUP BY + devices.de_product_id +), _s6 AS ( + SELECT + CAST(STRFTIME('%Y', pr_release) AS INTEGER) AS release_year, + SUM(n_rows_1) AS sum_n_rows + FROM _t1 GROUP BY - CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) + CAST(STRFTIME('%Y', pr_release) AS INTEGER) ), _s7 AS ( SELECT COUNT(*) AS n_rows, From 7bf3268f0151ce51f87e34cc1ead8bb92e28a512 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 20 Aug 2025 20:11:23 -0400 Subject: [PATCH 86/97] WIP fixing column handling triple_partition + other bugs --- pydough/conversion/join_agg_transpose.py | 55 ++++++++++++--- pydough/conversion/relational_converter.py | 2 + .../aggregation_analytics_2.txt | 6 +- .../aggregation_analytics_3.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 26 +++---- .../multi_partition_access_2.txt | 28 ++++---- .../multi_partition_access_4.txt | 8 +-- .../multi_partition_access_6.txt | 52 +++++++------- tests/test_sql_refsols/correl_30_sqlite.sql | 40 +++++------ .../defog_broker_adv5_ansi.sql | 70 ++++++++++--------- .../defog_broker_adv5_sqlite.sql | 56 +++++++-------- 11 files changed, 192 insertions(+), 157 deletions(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index ba54206a9..d16fec832 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -3,6 +3,8 @@ __all__ = ["pull_joins_after_aggregates"] +from collections.abc import Iterable + import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, @@ -39,6 +41,21 @@ def visit_join(self, node: Join) -> RelationalNode: ) return super().visit_join(node) + def generate_name(self, base: str, used_names: Iterable[str]) -> str: + """ + Generates a new name for a column based on the base name and the existing + columns in the join. This is used to ensure that the new column names are + unique and do not conflict with existing names. + """ + if base not in used_names: + return base + i = 0 + while True: + name = f"{base}_{i}" + if name not in used_names: + return name + i += 1 + def join_aggregate_transpose( self, join: Join, aggregate: Aggregate ) -> RelationalNode: @@ -84,6 +101,13 @@ def join_aggregate_transpose( ): return join + reverse_join_columns: dict[str, RelationalExpression] = {} + for join_col_name, join_col_expr in join.columns.items(): + assert isinstance(join_col_expr, ColumnReference) + reverse_join_columns[join_col_expr.name] = ColumnReference( + join_col_name, join_col_expr.data_type + ) + new_join_columns: dict[str, RelationalExpression] = {} new_key_columns: dict[str, RelationalExpression] = {} new_aggregate_columns: dict[str, CallExpression] = {} @@ -113,22 +137,31 @@ def join_aggregate_transpose( new_join_columns[key_name] = add_input_name( key_expr, join.default_input_aliases[0] ) - if key_name in used_column_names: - assert False - new_key_columns[key_name] = ColumnReference(key_name, col_expr.data_type) - used_column_names.add(key_name) + agg_key_name: str = self.generate_name(key_name, used_column_names) + new_key_columns[agg_key_name] = ColumnReference( + key_name, col_expr.data_type + ) + used_column_names.add(agg_key_name) for agg_name, agg_expr in aggregate.aggregations.items(): + new_inputs: list[RelationalExpression] = [] for input_expr in agg_expr.inputs: - if not isinstance(input_expr, ColumnReference): - assert False - if input_expr.name in new_join_columns: - assert False - new_join_columns[input_expr.name] = add_input_name( + join_name: str + if isinstance(input_expr, ColumnReference): + join_name = self.generate_name(input_expr.name, new_join_columns) + else: + join_name = self.generate_name("expr", new_join_columns) + new_join_columns[join_name] = add_input_name( input_expr, join.default_input_aliases[0] ) - if agg_name in used_column_names: - assert False + new_inputs.append(ColumnReference(join_name, input_expr.data_type)) + agg_name = self.generate_name(agg_name, used_column_names) + if new_inputs != agg_expr.inputs: + agg_expr = CallExpression( + agg_expr.op, + agg_expr.data_type, + new_inputs, + ) new_aggregate_columns[agg_name] = agg_expr used_column_names.add(agg_name) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 692cbf948..924352e52 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1496,6 +1496,8 @@ def optimize_relational_tree( simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) root = confirm_root(pull_joins_after_aggregates(root)) + print() + print(root.to_tree_string()) root = ColumnPruner().prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..ebea7f70b 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'ps_partkey': anything_ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'p_name': ANYTHING(p_name), 'sum_revenue': SUM(expr)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_ps_partkey': t0.ps_partkey, 'expr': t0.l_extendedprice * 1:numeric - t0.l_discount * 1:numeric - t0.l_tax - t0.l_quantity * t0.ps_supplycost, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..0b44cb5a4 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) + AGGREGATE(keys={'ps_partkey': anything_ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(expr)}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_ps_partkey': t0.ps_partkey, 'expr': t0.l_extendedprice * 1:numeric - t0.l_discount * 1:numeric - t0.l_tax - t0.l_quantity * t0.ps_supplycost, 'l_quantity': t0.l_quantity, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_reve SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 1ce81c590..b8b18388e 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) +ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) + AGGREGATE(keys={'anything_n_nationkey': anything_n_nationkey, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) + JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_nationkey': t0.n_nationkey, 'n_name': t0.n_name, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,13 +10,13 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 4d9bd41a7..ea9c3f303 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -3,26 +3,26 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTic JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares)}) + AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'cust_avg_shares': ANYTHING(cust_avg_shares), 'cust_tick_avg_shares': ANYTHING(cust_tick_avg_shares), 'sbTxCustId': ANYTHING(sbTxCustId), 'sbTxTickerId': ANYTHING(sbTxTickerId)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares_1, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'customer_id_9': ANYTHING(customer_id_9), 'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares_1': ANYTHING(cust_tick_avg_shares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'sbTxShares': ANYTHING(sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9b5f5003a..3e4a0f4a7 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) + AGGREGATE(keys={'sbTxCustId_0': sbTxCustId_0}, aggregations={'cust_max_shares': MAX(sbTxShares), 'cust_ticker_max_shares': ANYTHING(cust_ticker_max_shares), 'sbTxCustId': ANYTHING(sbTxCustId), 'sbTxTickerId': ANYTHING(sbTxTickerId)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t0.sbTxShares, 'sbTxTickerId': t1.sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3da2dfe6f..7bf3bb930 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -4,43 +4,43 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxType_0': sbTxType_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t0.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxType_0': sbTxType_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t0.sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId_0': sbTxTickerId_0}, aggregations={'sbTxTickerId': ANYTHING(sbTxTickerId), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'n_ticker_type_trans': ANYTHING(n_ticker_type_trans), 'sbTxType': ANYTHING(sbTxType)}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t1.sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index 1463bac6f..eaf34eca7 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -17,21 +17,6 @@ WITH _t2 AS ( FROM tpch.region WHERE NOT r_name IN ('MIDDLE EAST', 'AFRICA', 'ASIA') -), _s12 AS ( - SELECT - MAX(nation.n_name) AS anything_n_name, - MAX(LOWER(_t3.r_name)) AS anything_region_name, - COUNT(*) AS n_rows, - nation.n_nationkey - FROM tpch.nation AS nation - JOIN _s1 AS _s1 - ON _s1.c_nationkey = nation.n_nationkey - JOIN _t3 AS _t3 - ON _t3.r_regionkey = nation.n_regionkey - JOIN _t2 AS _s5 - ON _s1.avg_cust_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey - GROUP BY - nation.n_nationkey ), _t5 AS ( SELECT s_acctbal, @@ -59,13 +44,22 @@ WITH _t2 AS ( nation.n_nationkey ) SELECT - _s12.anything_region_name AS region_name, - _s12.anything_n_name AS nation_name, - _s12.n_rows AS n_above_avg_customers, - _s13.n_rows AS n_above_avg_suppliers -FROM _s12 AS _s12 + MAX(LOWER(_t3.r_name)) AS region_name, + MAX(nation.n_name) AS nation_name, + COUNT(*) AS n_above_avg_customers, + MAX(_s13.n_rows) AS n_above_avg_suppliers +FROM tpch.nation AS nation +JOIN _s1 AS _s1 + ON _s1.c_nationkey = nation.n_nationkey +JOIN _t3 AS _t3 + ON _t3.r_regionkey = nation.n_regionkey +JOIN _t2 AS _s5 + ON _s1.avg_cust_acctbal < _s5.c_acctbal AND _s5.c_nationkey = nation.n_nationkey JOIN _s13 AS _s13 - ON _s12.n_nationkey = _s13.n_nationkey + ON _s13.n_nationkey = anything_n_nationkey +GROUP BY + nation.n_nationkey, + n_nationkey ORDER BY - _s12.anything_region_name, - _s12.anything_n_name + MAX(LOWER(_t3.r_name)), + MAX(nation.n_name) diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index 48ac2f401..b05541fdc 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -1,49 +1,55 @@ -WITH _s0 AS ( +WITH _t1 AS ( SELECT - COUNT(sbdpclose) AS count_sbdpclose, - MAX(sbdphigh) AS max_high, - MIN(sbdplow) AS min_low, + COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, + MAX(sbdailyprice.sbdphigh) AS max_high, + MIN(sbdailyprice.sbdplow) AS min_low, + ANY_VALUE(sbticker.sbtickersymbol) AS sbtickersymbol, + SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( - 2 * -1 - )) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING( + CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), + ( + 2 * -1 + ) + ) END - ) AS month, - SUM(sbdpclose) AS sum_sbdpclose, - sbdptickerid - FROM main.sbdailyprice + ) AS month + FROM main.sbdailyprice AS sbdailyprice + JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid GROUP BY CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( - 2 * -1 - )) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING( + CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), + ( + 2 * -1 + ) + ) END ), - sbdptickerid + sbdailyprice.sbdptickerid ), _t0 AS ( SELECT - MAX(_s0.max_high) AS max_high, - MIN(_s0.min_low) AS min_low, - SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, - SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, - _s0.month, - sbticker.sbtickersymbol - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbdptickerid = sbticker.sbtickerid + MAX(max_high) AS max_high, + MIN(min_low) AS min_low, + SUM(count_sbdpclose) AS sum_count_sbdpclose, + SUM(sum_sbdpclose) AS sum_sum_sbdpclose, + month, + sbtickersymbol + FROM _t1 GROUP BY - _s0.month, - sbticker.sbtickersymbol + month, + sbtickersymbol ) SELECT sbtickersymbol AS symbol, diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index 8a07d126c..9acd0eb0c 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -1,49 +1,49 @@ -WITH _s0 AS ( +WITH _t1 AS ( SELECT - COUNT(sbdpclose) AS count_sbdpclose, - MAX(sbdphigh) AS max_high, - MIN(sbdplow) AS min_low, + COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, + MAX(sbdailyprice.sbdphigh) AS max_high, + MIN(sbdailyprice.sbdplow) AS min_low, + MAX(sbticker.sbtickersymbol) AS sbtickersymbol, + SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose, CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( 2 * -1 )) END - ) AS month, - SUM(sbdpclose) AS sum_sbdpclose, - sbdptickerid - FROM main.sbdailyprice + ) AS month + FROM main.sbdailyprice AS sbdailyprice + JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid GROUP BY CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( 2 * -1 )) END ), - sbdptickerid + sbdailyprice.sbdptickerid ), _t0 AS ( SELECT - MAX(_s0.max_high) AS max_high, - MIN(_s0.min_low) AS min_low, - SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, - SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, - _s0.month, - sbticker.sbtickersymbol - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbdptickerid = sbticker.sbtickerid + MAX(max_high) AS max_high, + MIN(min_low) AS min_low, + SUM(count_sbdpclose) AS sum_count_sbdpclose, + SUM(sum_sbdpclose) AS sum_sum_sbdpclose, + month, + sbtickersymbol + FROM _t1 GROUP BY - _s0.month, - sbticker.sbtickersymbol + month, + sbtickersymbol ) SELECT sbtickersymbol AS symbol, From a34ec879c6ae9d1dcfc6ff449f870e5f45ff975f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 14:05:11 -0400 Subject: [PATCH 87/97] Adding reverse cardinality support --- pydough/conversion/hybrid_connection.py | 10 +++- pydough/conversion/hybrid_decorrelater.py | 8 +++ pydough/conversion/hybrid_tree.py | 53 ++++++++++++++++++- pydough/conversion/relational_converter.py | 30 +++++++++++ pydough/metadata/parse.py | 2 + .../reversible_property_metadata.py | 5 ++ pydough/relational/relational_nodes/join.py | 26 ++++++++- .../access_partition_child_after_filter.txt | 2 +- .../access_partition_child_backref_calc.txt | 2 +- ..._partition_child_filter_backref_filter.txt | 2 +- tests/test_plan_refsols/agg_max_ranking.txt | 2 +- .../agg_orders_by_year_month_just_europe.txt | 8 +-- .../agg_orders_by_year_month_vs_europe.txt | 8 +-- .../agg_parts_by_type_backref_global.txt | 2 +- tests/test_plan_refsols/aggregate_anti.txt | 4 +- .../aggregate_mixed_levels_simple.txt | 4 +- .../aggregate_on_function_call.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 4 +- .../aggregate_then_backref.txt | 4 +- .../aggregation_analytics_1.txt | 12 ++--- .../aggregation_analytics_2.txt | 8 +-- .../aggregation_analytics_3.txt | 8 +-- tests/test_plan_refsols/anti_aggregate.txt | 4 +- .../anti_aggregate_alternate.txt | 4 +- tests/test_plan_refsols/anti_singular.txt | 2 +- tests/test_plan_refsols/asian_nations.txt | 2 +- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 4 +- .../avg_order_diff_per_customer.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 4 +- tests/test_plan_refsols/common_prefix_a.txt | 4 +- tests/test_plan_refsols/common_prefix_aa.txt | 4 +- tests/test_plan_refsols/common_prefix_ab.txt | 4 +- tests/test_plan_refsols/common_prefix_ac.txt | 2 +- tests/test_plan_refsols/common_prefix_ad.txt | 8 +-- tests/test_plan_refsols/common_prefix_ae.txt | 8 +-- tests/test_plan_refsols/common_prefix_af.txt | 8 +-- tests/test_plan_refsols/common_prefix_ag.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_ah.txt | 22 ++++---- tests/test_plan_refsols/common_prefix_ai.txt | 20 +++---- tests/test_plan_refsols/common_prefix_aj.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_ak.txt | 26 ++++----- tests/test_plan_refsols/common_prefix_al.txt | 14 ++--- tests/test_plan_refsols/common_prefix_am.txt | 10 ++-- tests/test_plan_refsols/common_prefix_an.txt | 12 ++--- tests/test_plan_refsols/common_prefix_ao.txt | 12 ++--- tests/test_plan_refsols/common_prefix_ap.txt | 6 +-- tests/test_plan_refsols/common_prefix_aq.txt | 8 +-- tests/test_plan_refsols/common_prefix_b.txt | 6 +-- tests/test_plan_refsols/common_prefix_c.txt | 10 ++-- tests/test_plan_refsols/common_prefix_d.txt | 12 ++--- tests/test_plan_refsols/common_prefix_e.txt | 4 +- tests/test_plan_refsols/common_prefix_f.txt | 6 +-- tests/test_plan_refsols/common_prefix_g.txt | 6 +-- tests/test_plan_refsols/common_prefix_h.txt | 10 ++-- tests/test_plan_refsols/common_prefix_i.txt | 4 +- tests/test_plan_refsols/common_prefix_j.txt | 4 +- tests/test_plan_refsols/common_prefix_k.txt | 4 +- tests/test_plan_refsols/common_prefix_l.txt | 10 ++-- tests/test_plan_refsols/common_prefix_m.txt | 10 ++-- tests/test_plan_refsols/common_prefix_n.txt | 14 ++--- tests/test_plan_refsols/common_prefix_o.txt | 14 ++--- tests/test_plan_refsols/common_prefix_p.txt | 6 +-- tests/test_plan_refsols/common_prefix_q.txt | 8 +-- tests/test_plan_refsols/common_prefix_r.txt | 8 +-- tests/test_plan_refsols/common_prefix_s.txt | 6 +-- tests/test_plan_refsols/common_prefix_t.txt | 6 +-- tests/test_plan_refsols/common_prefix_u.txt | 6 +-- tests/test_plan_refsols/common_prefix_v.txt | 4 +- tests/test_plan_refsols/common_prefix_w.txt | 4 +- tests/test_plan_refsols/common_prefix_x.txt | 4 +- tests/test_plan_refsols/common_prefix_y.txt | 4 +- tests/test_plan_refsols/common_prefix_z.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 2 +- tests/test_plan_refsols/correl_11.txt | 2 +- tests/test_plan_refsols/correl_12.txt | 4 +- tests/test_plan_refsols/correl_13.txt | 4 +- tests/test_plan_refsols/correl_14.txt | 8 +-- tests/test_plan_refsols/correl_15.txt | 10 ++-- tests/test_plan_refsols/correl_16.txt | 6 +-- tests/test_plan_refsols/correl_17.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_19.txt | 4 +- tests/test_plan_refsols/correl_2.txt | 4 +- tests/test_plan_refsols/correl_20.txt | 10 ++-- tests/test_plan_refsols/correl_21.txt | 2 +- tests/test_plan_refsols/correl_22.txt | 2 +- tests/test_plan_refsols/correl_23.txt | 2 +- tests/test_plan_refsols/correl_24.txt | 2 +- tests/test_plan_refsols/correl_25.txt | 14 ++--- tests/test_plan_refsols/correl_26.txt | 12 ++--- tests/test_plan_refsols/correl_27.txt | 12 ++--- tests/test_plan_refsols/correl_28.txt | 12 ++--- tests/test_plan_refsols/correl_29.txt | 12 ++--- tests/test_plan_refsols/correl_3.txt | 6 +-- tests/test_plan_refsols/correl_30.txt | 14 ++--- tests/test_plan_refsols/correl_31.txt | 10 ++-- tests/test_plan_refsols/correl_32.txt | 6 +-- tests/test_plan_refsols/correl_33.txt | 2 +- tests/test_plan_refsols/correl_34.txt | 14 ++--- tests/test_plan_refsols/correl_35.txt | 14 ++--- tests/test_plan_refsols/correl_36.txt | 22 ++++---- tests/test_plan_refsols/correl_4.txt | 6 +-- tests/test_plan_refsols/correl_5.txt | 6 +-- tests/test_plan_refsols/correl_6.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/correl_8.txt | 2 +- tests/test_plan_refsols/correl_9.txt | 2 +- ...count_at_most_100_suppliers_per_nation.txt | 2 +- .../count_cust_supplier_nation_combos.txt | 12 ++--- ...multiple_subcollections_alongside_aggs.txt | 4 +- .../count_single_subcollection.txt | 2 +- .../cumulative_stock_analysis.txt | 2 +- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 2 +- .../customers_sum_line_price.txt | 4 +- .../test_plan_refsols/deep_best_analysis.txt | 20 +++---- tests/test_plan_refsols/double_cross.txt | 6 +-- tests/test_plan_refsols/dumb_aggregation.txt | 2 +- .../epoch_culture_events_info.txt | 10 ++-- .../epoch_event_gap_per_era.txt | 2 +- .../epoch_events_per_season.txt | 2 +- .../epoch_first_event_per_era.txt | 2 +- .../epoch_intra_season_searches.txt | 18 +++---- ...och_most_popular_search_engine_per_tod.txt | 2 +- .../epoch_most_popular_topic_per_region.txt | 4 +- .../epoch_num_predawn_cold_war.txt | 6 +-- ...ping_event_search_other_users_per_user.txt | 8 +-- ...ch_overlapping_event_searches_per_user.txt | 8 +-- .../epoch_pct_searches_per_tod.txt | 2 +- .../epoch_search_results_by_tod.txt | 2 +- .../epoch_summer_events_per_type.txt | 2 +- .../epoch_unique_users_per_engine.txt | 4 +- .../epoch_users_most_cold_war_searches.txt | 6 +-- .../first_order_per_customer.txt | 2 +- tests/test_plan_refsols/function_sampler.txt | 4 +- .../global_aggfunc_backref.txt | 2 +- .../global_aggfuncs_multiple_children.txt | 4 +- tests/test_plan_refsols/hour_minute_day.txt | 2 +- .../join_asia_region_nations.txt | 2 +- tests/test_plan_refsols/join_order_by.txt | 2 +- .../join_order_by_back_reference.txt | 2 +- .../join_order_by_pruned_back_reference.txt | 2 +- .../test_plan_refsols/join_region_nations.txt | 2 +- .../join_region_nations_customers.txt | 4 +- tests/test_plan_refsols/join_topk.txt | 2 +- .../lineitem_regional_shipments.txt | 16 +++--- .../lineitem_regional_shipments2.txt | 16 +++--- .../lineitem_regional_shipments3.txt | 16 +++--- ...lineitems_access_cust_supplier_nations.txt | 12 ++--- .../lines_german_supplier_economy_part.txt | 10 ++-- .../lines_shipping_vs_customer_region.txt | 16 +++--- .../month_year_sliding_windows.txt | 2 +- .../mostly_positive_accounts_per_nation1.txt | 4 +- .../mostly_positive_accounts_per_nation2.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 4 +- .../multi_partition_access_2.txt | 20 +++---- .../multi_partition_access_3.txt | 14 ++--- .../multi_partition_access_4.txt | 4 +- .../multi_partition_access_5.txt | 8 +-- .../multi_partition_access_6.txt | 28 +++++----- .../test_plan_refsols/multiple_has_hasnot.txt | 18 +++---- ...ple_simple_aggregations_multiple_calcs.txt | 4 +- ...ltiple_simple_aggregations_single_calc.txt | 4 +- .../nation_acctbal_breakdown.txt | 4 +- tests/test_plan_refsols/nation_best_order.txt | 6 +-- .../nation_name_contains_region_name.txt | 2 +- .../nations_access_region.txt | 2 +- .../nations_order_by_num_suppliers.txt | 2 +- .../nations_region_order_by_name.txt | 2 +- .../nations_sum_line_price.txt | 6 +-- .../num_positive_accounts_per_nation.txt | 4 +- .../odate_and_rdate_avggap.txt | 2 +- .../order_by_before_join.txt | 2 +- .../ordered_asian_nations.txt | 2 +- .../orders_sum_line_price.txt | 2 +- .../orders_sum_vs_count_line_price.txt | 2 +- .../orders_versus_first_orders.txt | 6 +-- tests/test_plan_refsols/pagerank_a1.txt | 6 +-- tests/test_plan_refsols/pagerank_a2.txt | 10 ++-- tests/test_plan_refsols/pagerank_a6.txt | 26 ++++----- tests/test_plan_refsols/pagerank_b3.txt | 14 ++--- tests/test_plan_refsols/pagerank_c4.txt | 18 +++---- tests/test_plan_refsols/pagerank_d5.txt | 22 ++++---- tests/test_plan_refsols/pagerank_h8.txt | 34 ++++++------ tests/test_plan_refsols/part_cross_part_a.txt | 10 ++-- tests/test_plan_refsols/part_cross_part_b.txt | 10 ++-- tests/test_plan_refsols/part_cross_part_c.txt | 10 ++-- tests/test_plan_refsols/part_reduced_size.txt | 2 +- .../parts_quantity_increase_95_96.txt | 8 +-- .../percentile_customers_per_region.txt | 4 +- .../quantile_function_test_1.txt | 2 +- .../quantile_function_test_2.txt | 6 +-- .../quantile_function_test_3.txt | 6 +-- .../quantile_function_test_4.txt | 6 +-- .../rank_customers_per_nation.txt | 2 +- .../rank_customers_per_region.txt | 4 +- .../rank_nations_by_region.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 4 +- ...rank_parts_per_supplier_region_by_size.txt | 8 +-- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- .../region_acctbal_breakdown.txt | 4 +- .../region_nation_window_aggs.txt | 2 +- .../region_nations_backref.txt | 2 +- .../region_orders_from_nations_richest.txt | 6 +-- .../regional_first_order_best_line_part.txt | 10 ++-- .../regional_suppliers_percentile.txt | 6 +-- .../regions_sum_line_price.txt | 8 +-- tests/test_plan_refsols/replace_order_by.txt | 2 +- .../richest_customer_per_region.txt | 4 +- tests/test_plan_refsols/semi_aggregate.txt | 4 +- tests/test_plan_refsols/semi_singular.txt | 2 +- tests/test_plan_refsols/simple_anti_1.txt | 2 +- tests/test_plan_refsols/simple_anti_2.txt | 4 +- tests/test_plan_refsols/simple_cross_1.txt | 2 +- tests/test_plan_refsols/simple_cross_10.txt | 6 +-- tests/test_plan_refsols/simple_cross_11.txt | 2 +- tests/test_plan_refsols/simple_cross_12.txt | 2 +- tests/test_plan_refsols/simple_cross_2.txt | 2 +- tests/test_plan_refsols/simple_cross_3.txt | 14 ++--- tests/test_plan_refsols/simple_cross_4.txt | 4 +- tests/test_plan_refsols/simple_cross_5.txt | 8 +-- tests/test_plan_refsols/simple_cross_6.txt | 2 +- tests/test_plan_refsols/simple_cross_7.txt | 4 +- tests/test_plan_refsols/simple_cross_8.txt | 16 +++--- tests/test_plan_refsols/simple_cross_9.txt | 6 +-- tests/test_plan_refsols/simple_semi_1.txt | 2 +- tests/test_plan_refsols/simple_semi_2.txt | 4 +- tests/test_plan_refsols/simple_var_std.txt | 2 +- tests/test_plan_refsols/singular1.txt | 2 +- tests/test_plan_refsols/singular2.txt | 4 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 2 +- tests/test_plan_refsols/singular5.txt | 4 +- tests/test_plan_refsols/singular6.txt | 8 +-- tests/test_plan_refsols/singular7.txt | 6 +-- tests/test_plan_refsols/singular_anti.txt | 2 +- tests/test_plan_refsols/singular_semi.txt | 2 +- tests/test_plan_refsols/smoke_d.txt | 4 +- .../sqlite_udf_combine_strings.txt | 4 +- .../sqlite_udf_count_epsilon.txt | 4 +- .../sqlite_udf_covar_pop.txt | 6 +-- tests/test_plan_refsols/sqlite_udf_nested.txt | 2 +- tests/test_plan_refsols/sqlite_udf_nval.txt | 2 +- .../sqlite_udf_percent_positive.txt | 8 +-- .../test_plan_refsols/supplier_best_part.txt | 8 +-- .../supplier_pct_national_qty.txt | 8 +-- .../test_plan_refsols/suppliers_bal_diffs.txt | 4 +- ...ograph_battery_failure_rates_anomalies.txt | 8 +-- .../technograph_country_cartesian_oddball.txt | 2 +- ...chnograph_country_combination_analysis.txt | 10 ++-- ...nograph_country_incident_rate_analysis.txt | 14 ++--- ...aph_error_percentages_sun_set_by_error.txt | 6 +-- ..._error_rate_sun_set_by_factory_country.txt | 6 +-- .../technograph_global_incident_rate.txt | 2 +- .../technograph_hot_purchase_window.txt | 4 +- ...hnograph_incident_rate_by_release_year.txt | 8 +-- .../technograph_incident_rate_per_brand.txt | 4 +- .../technograph_monthly_incident_rate.txt | 16 +++--- .../technograph_most_unreliable_products.txt | 4 +- ...umulative_incident_rate_goldcopperstar.txt | 16 +++--- ..._year_cumulative_incident_rate_overall.txt | 8 +-- ...top_5_nations_balance_by_num_suppliers.txt | 2 +- .../top_5_nations_by_num_supplierss.txt | 2 +- .../top_customers_by_orders.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 6 +-- tests/test_plan_refsols/tpch_q11.txt | 10 ++-- tests/test_plan_refsols/tpch_q12.txt | 2 +- tests/test_plan_refsols/tpch_q13.txt | 2 +- tests/test_plan_refsols/tpch_q14.txt | 2 +- tests/test_plan_refsols/tpch_q15.txt | 6 +-- tests/test_plan_refsols/tpch_q16.txt | 4 +- tests/test_plan_refsols/tpch_q17.txt | 2 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q19.txt | 2 +- tests/test_plan_refsols/tpch_q2.txt | 8 +-- tests/test_plan_refsols/tpch_q20.txt | 8 +-- tests/test_plan_refsols/tpch_q21.txt | 14 ++--- tests/test_plan_refsols/tpch_q22.txt | 4 +- tests/test_plan_refsols/tpch_q3.txt | 4 +- tests/test_plan_refsols/tpch_q4.txt | 2 +- tests/test_plan_refsols/tpch_q5.txt | 12 ++--- tests/test_plan_refsols/tpch_q7.txt | 10 ++-- tests/test_plan_refsols/tpch_q8.txt | 14 ++--- tests/test_plan_refsols/tpch_q9.txt | 10 ++-- tests/test_plan_refsols/triple_partition.txt | 16 +++--- .../various_aggfuncs_simple.txt | 2 +- .../window_filter_order_1.txt | 4 +- .../window_filter_order_10.txt | 2 +- .../window_filter_order_2.txt | 4 +- .../window_filter_order_3.txt | 4 +- .../window_filter_order_4.txt | 4 +- .../window_filter_order_5.txt | 2 +- .../window_filter_order_6.txt | 2 +- .../window_filter_order_7.txt | 2 +- .../window_filter_order_8.txt | 4 +- .../window_filter_order_9.txt | 4 +- .../window_sliding_frame_relsize.txt | 2 +- .../window_sliding_frame_relsum.txt | 2 +- .../year_month_nation_orders.txt | 6 +-- 304 files changed, 1074 insertions(+), 948 deletions(-) diff --git a/pydough/conversion/hybrid_connection.py b/pydough/conversion/hybrid_connection.py index e9385c5ef..a39a147b9 100644 --- a/pydough/conversion/hybrid_connection.py +++ b/pydough/conversion/hybrid_connection.py @@ -10,7 +10,7 @@ from enum import Enum from typing import TYPE_CHECKING -from pydough.relational import JoinType +from pydough.relational import JoinCardinality, JoinType from .hybrid_expressions import ( HybridFunctionExpr, @@ -313,6 +313,8 @@ class HybridConnection: child can be defined at (exclusive). - `aggs`: a mapping of aggregation calls made onto expressions relative to the context of `subtree`. + - `reverse_cardinality`: the JoinCardinality of the connection from the + perspective of the child subtree back to the parent tree. """ parent: "HybridTree" @@ -349,6 +351,12 @@ class HybridConnection: expressions defined relative to the child subtree. """ + reverse_cardinality: JoinCardinality + """ + The JoinCardinality of the connection from the perspective of the child + subtree back to the parent tree. + """ + always_exists: bool | None = None """ Whether the connection is guaranteed to have at least one matching diff --git a/pydough/conversion/hybrid_decorrelater.py b/pydough/conversion/hybrid_decorrelater.py index 628d37e4d..fe48c2379 100644 --- a/pydough/conversion/hybrid_decorrelater.py +++ b/pydough/conversion/hybrid_decorrelater.py @@ -9,6 +9,7 @@ import copy import pydough.pydough_operators as pydop +from pydough.relational import JoinCardinality from pydough.types import BooleanType from .hybrid_connection import ConnectionType, HybridConnection @@ -427,6 +428,13 @@ def decorrelate_child( ) if child.connection_type.is_aggregation or is_faux_agg: child.subtree.agg_keys = new_agg_keys + + # Mark the reverse cardinality as SINGULAR_ACCESS since each record of + # the de-correlated child can only match with one record of the + # original parent due to the join keys being based on the uniqueness + # keys of the original parent. + child.reverse_cardinality = JoinCardinality.SINGULAR_ACCESS + # If the child is such that we don't need to keep rows from the parent # without a match, replace the parent & its ancestors with a # HybridPullUp node (and replace any other deleted nodes with no-ops). diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index a9af8d1ef..9567d30f9 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -18,11 +18,13 @@ from pydough.metadata import ( SubcollectionRelationshipMetadata, ) +from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Literal, SubCollection, TableCollection, ) +from pydough.relational import JoinCardinality from pydough.types import BooleanType, NumericType from .hybrid_connection import ConnectionType, HybridConnection @@ -571,13 +573,27 @@ def add_child( # Return the index of the existing child. return idx + # Infer the cardinality of the join from the perspective of the new + # collection to the existing data. + reverse_cardinality: JoinCardinality = child.infer_root_reverse_cardinality() + # Create and insert the new child connection. new_child_idx = len(self.children) connection: HybridConnection = HybridConnection( - self, child, connection_type, min_steps, max_steps, {} + self, + child, + connection_type, + min_steps, + max_steps, + {}, + reverse_cardinality, ) self._children.append(connection) + # Augment the reverse cardinality if the parent does not always exist. + if (not reverse_cardinality.filters) and (not self.always_exists()): + connection.reverse_cardinality = reverse_cardinality.add_filter() + # If an operation prevents the child's presence from directly # filtering the current level, update its connection type to be either # SINGULAR or AGGREGATION, then insert a similar COUNT(*)/PRESENT @@ -597,6 +613,41 @@ def add_child( # Return the index of the newly created child. return new_child_idx + def infer_root_reverse_cardinality(self) -> JoinCardinality: + """ + TODO + """ + if self.parent is None: + match self.pipeline[0]: + case HybridRoot(): + return JoinCardinality.PLURAL_ACCESS + case HybridCollectionAccess(): + cardinality: JoinCardinality = JoinCardinality.PLURAL_ACCESS + if isinstance(self.pipeline[0].collection, SubCollection): + metadata = self.pipeline[0].collection.subcollection_property + if ( + isinstance(metadata, ReversiblePropertyMetadata) + and metadata.reverse is not None + ): + if metadata.reverse.is_plural: + cardinality = JoinCardinality.PLURAL_ACCESS + else: + cardinality = JoinCardinality.SINGULAR_ACCESS + if not metadata.reverse.always_matches: + cardinality = cardinality.add_filter() + return JoinCardinality.PLURAL_ACCESS + return cardinality + case HybridPartition(): + return self.children[0].subtree.infer_root_reverse_cardinality() + case HybridPartitionChild(): + return self.pipeline[0].subtree.infer_root_reverse_cardinality() + case _: + raise NotImplementedError( + f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" + ) + else: + return self.parent.infer_root_reverse_cardinality() + def add_successor(self, successor: "HybridTree") -> None: """ Marks two hybrid trees in a predecessor-successor relationship. diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index df8a5284a..a751bfba4 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -18,6 +18,7 @@ SimpleJoinMetadata, SimpleTableMetadata, ) +from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Calculate, CollectionAccess, @@ -428,6 +429,7 @@ def join_outputs( rhs_result: TranslationOutput, join_type: JoinType, join_cardinality: JoinCardinality, + reverse_join_cardinality: JoinCardinality, join_keys: list[tuple[HybridExpr, HybridExpr]] | None, join_cond: HybridExpr | None, child_idx: int | None, @@ -444,6 +446,8 @@ def join_outputs( onto `rhs_result`. `join_cardinality`: the cardinality of the join to be used to connect `lhs_result` onto `rhs_result`. + `reverse_join_cardinality`: the cardinality of the join from the + perspective of `rhs_result`. `join_keys`: a list of tuples in the form `(lhs_key, rhs_key)` that represent the equi-join keys used for the join from either side. This can be None if the `join_cond` is provided instead. @@ -488,6 +492,7 @@ def join_outputs( join_type, join_columns, join_cardinality, + reverse_join_cardinality, correl_name=lhs_result.correlated_name, ) input_aliases: list[str | None] = out_rel.default_input_aliases @@ -700,6 +705,7 @@ def handle_children( child_output, child.connection_type.join_type, cardinality, + child.reverse_cardinality, join_keys, child.subtree.general_join_condition, child_idx, @@ -714,6 +720,7 @@ def handle_children( child_output, child.connection_type.join_type, JoinCardinality.SINGULAR_FILTER, + child.reverse_cardinality, join_keys, child.subtree.general_join_condition, child_idx, @@ -839,6 +846,26 @@ def translate_sub_collection( else cardinality.add_filter() ) + # Infer the cardinality of the join from the perspective of the new + # collection to the existing data. + reverse_cardinality: JoinCardinality + if ( + isinstance( + collection_access.subcollection_property, ReversiblePropertyMetadata + ) + and collection_access.subcollection_property.reverse is not None + ): + if collection_access.subcollection_property.reverse.is_plural: + reverse_cardinality = JoinCardinality.PLURAL_ACCESS + else: + reverse_cardinality = JoinCardinality.SINGULAR_ACCESS + if not collection_access.subcollection_property.reverse.always_matches: + reverse_cardinality = reverse_cardinality.add_filter() + else: + reverse_cardinality = JoinCardinality.PLURAL_ACCESS + if (not reverse_cardinality.filters) and (not parent.always_exists()): + reverse_cardinality = reverse_cardinality.add_filter() + join_keys: list[tuple[HybridExpr, HybridExpr]] | None = None join_cond: HybridExpr | None = None match collection_access.subcollection_property: @@ -868,6 +895,7 @@ def translate_sub_collection( rhs_output, JoinType.INNER, cardinality, + reverse_cardinality, join_keys, join_cond, None, @@ -1099,6 +1127,7 @@ def translate_partition_child( child_output, JoinType.INNER, JoinCardinality.PLURAL_FILTER, + JoinCardinality.SINGULAR_ACCESS, join_keys, None, None, @@ -1259,6 +1288,7 @@ def rel_translation( result, JoinType.INNER, JoinCardinality.PLURAL_ACCESS, + JoinCardinality.SINGULAR_ACCESS, join_keys, None, None, diff --git a/pydough/metadata/parse.py b/pydough/metadata/parse.py index 41dce9ec4..3c14da6ed 100644 --- a/pydough/metadata/parse.py +++ b/pydough/metadata/parse.py @@ -311,6 +311,8 @@ def create_reverse_relationship( extra_semantic_info, ) ) + original_property.reverse = reverse_property + reverse_property.reverse = original_property reverse_collection.add_property(reverse_property) diff --git a/pydough/metadata/properties/reversible_property_metadata.py b/pydough/metadata/properties/reversible_property_metadata.py index 42b5983a2..abce1a327 100644 --- a/pydough/metadata/properties/reversible_property_metadata.py +++ b/pydough/metadata/properties/reversible_property_metadata.py @@ -17,6 +17,11 @@ class ReversiblePropertyMetadata(SubcollectionRelationshipMetadata): reverse relationship. """ + reverse: SubcollectionRelationshipMetadata | None = None + """ + The reverse property that goes from the child back to the parent. + """ + @abstractmethod def build_reverse_relationship( self, diff --git a/pydough/relational/relational_nodes/join.py b/pydough/relational/relational_nodes/join.py index fc96c841c..6c6a7a34d 100644 --- a/pydough/relational/relational_nodes/join.py +++ b/pydough/relational/relational_nodes/join.py @@ -160,6 +160,7 @@ def __init__( join_type: JoinType, columns: dict[str, RelationalExpression], cardinality: JoinCardinality = JoinCardinality.UNKNOWN_UNKNOWN, + reverse_cardinality: JoinCardinality = JoinCardinality.UNKNOWN_UNKNOWN, correl_name: str | None = None, ) -> None: super().__init__(columns) @@ -171,6 +172,7 @@ def __init__( self._condition: RelationalExpression = condition self._join_type: JoinType = join_type self._cardinality: JoinCardinality = cardinality + self._reverse_cardinality: JoinCardinality = reverse_cardinality self._correl_name: str | None = correl_name @property @@ -212,7 +214,7 @@ def join_type(self, join_type: JoinType) -> None: @property def cardinality(self) -> JoinCardinality: """ - The type of the joins. + The cardinality of the join, from the perspective of the first input. """ return self._cardinality @@ -223,6 +225,20 @@ def cardinality(self, cardinality: JoinCardinality) -> None: """ self._cardinality = cardinality + @property + def reverse_cardinality(self) -> JoinCardinality: + """ + The cardinality of the join, from the perspective of the second input. + """ + return self._reverse_cardinality + + @reverse_cardinality.setter + def reverse_cardinality(self, cardinality: JoinCardinality) -> None: + """ + The setter for the reverse join cardinality. + """ + self._reverse_cardinality = cardinality + @property def inputs(self) -> list[RelationalNode]: return self._inputs @@ -261,7 +277,12 @@ def to_string(self, compact: bool = False) -> str: if self.cardinality == JoinCardinality.UNKNOWN_UNKNOWN else f", cardinality={self.cardinality.name}" ) - return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" + reverse_cardinality_suffix: str = ( + "" + if self.reverse_cardinality == JoinCardinality.UNKNOWN_UNKNOWN + else f", reverse_cardinality={self.reverse_cardinality.name}" + ) + return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}{reverse_cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" def accept(self, visitor: "RelationalVisitor") -> None: visitor.visit_join(self) @@ -280,5 +301,6 @@ def node_copy( self.join_type, columns, self.cardinality, + self.reverse_cardinality, self.correl_name, ) diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index 24d37657a..76c2d9652 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_backref_calc.txt b/tests/test_plan_refsols/access_partition_child_backref_calc.txt index 5b5f23b34..3296b8420 100644 --- a/tests/test_plan_refsols/access_partition_child_backref_calc.txt +++ b/tests/test_plan_refsols/access_partition_child_backref_calc.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price_versus_avg', p_retailprice - avg_price)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt index 799732da6..9527b7c1a 100644 --- a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/agg_max_ranking.txt b/tests/test_plan_refsols/agg_max_ranking.txt index 129c232f4..d73080147 100644 --- a/tests/test_plan_refsols/agg_max_ranking.txt +++ b/tests/test_plan_refsols/agg_max_ranking.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('highest_rank', highest_rank)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'highest_rank': MAX(cust_rank)}) PROJECT(columns={'c_nationkey': c_nationkey, 'cust_rank': RANKING(args=[], partition=[], order=[(c_acctbal):desc_first], allow_ties=True)}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index 8690211bf..5a9021e4e 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -1,12 +1,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'year': t0.year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 5325d91e9..1bc99c0d2 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -1,12 +1,12 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), ('total_orders', DEFAULT_TO(agg_1, 0:numeric))], orderings=[]) - JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) + JOIN(condition=t0.month == t1.month & t0.year == t1.year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'month': t0.month, 'n_rows': t0.n_rows, 'year': t0.year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt index b3c214fec..3a3ff6bd6 100644 --- a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt +++ b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_type', p_type), ('percentage_of_parts', n_rows / total_num_parts), ('avg_price', avg_p_retailprice)], orderings=[]) - JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) + JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice), 'total_num_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index 02458ffdc..d780bbcb5 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 4efebacf2..81d20edcc 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,7 +1,7 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index 3e68d185b..d28ac7f15 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 8e8ffd889..3f4f8df2b 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index 8fa56b648..c3fd73955 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index ba946c021..58e0deb0c 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,16 +1,16 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) - JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 1d8f376f1..cb721fe2a 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,9 +1,9 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index df3d64e66..8d3788bf3 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,9 +1,9 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index 02458ffdc..b5feeed30 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 598407ce0..02ee0138c 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index a66ce09a4..117716d5e 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/asian_nations.txt b/tests/test_plan_refsols/asian_nations.txt index ad3901411..dc211ee3e 100644 --- a/tests/test_plan_refsols/asian_nations.txt +++ b/tests/test_plan_refsols/asian_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index c08e7d33f..1dbe56459 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'count_expr_1': COUNT(LARGEST(c_acctbal, 0:numeric)), 'sum_expr_1': SUM(LARGEST(c_acctbal, 0:numeric))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index 5a91d29b9..e1703a4fb 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index 7c5488e03..2e05042d5 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 4432b9290..a0a95a823 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 4432b9290..a0a95a823 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index 984b4aad1..da2b4c0c0 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index e58165f3d..3feedb723 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index ae633b5bc..afee2ad7c 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index 71a7e20e6..6ae902b07 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 77d4e5f1b..323768976 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ac.txt b/tests/test_plan_refsols/common_prefix_ac.txt index 6693cbeec..6e860bc35 100644 --- a/tests/test_plan_refsols/common_prefix_ac.txt +++ b/tests/test_plan_refsols/common_prefix_ac.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index b4202ffb5..7803949a3 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ae.txt b/tests/test_plan_refsols/common_prefix_ae.txt index 1bc0d2b9f..32e124483 100644 --- a/tests/test_plan_refsols/common_prefix_ae.txt +++ b/tests/test_plan_refsols/common_prefix_ae.txt @@ -1,13 +1,13 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_customers), ('customer_name', customer_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'customer_name': MAX(c_name), 'n_customers': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index c35fff43b..e2a896fca 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,14 +1,14 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 13620bfb8..a3e99e82a 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,12 +22,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -37,7 +37,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index bb3062a31..1e859202f 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -13,12 +13,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -28,7 +28,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 722878bb4..4448ee711 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,21 +1,21 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -25,7 +25,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index 36c32255d..ca09a6735 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,12 +22,12 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT(), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -37,7 +37,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 534ab16db..ab4e235dd 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,19 +1,19 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -22,11 +22,11 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -36,6 +36,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'TRUCK':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index cd75a8794..f7b11e162 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,22 +1,22 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', n_rows_1)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index a10ac09e8..1ed0ccd60 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,17 +1,17 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_agg_3': SUM(agg_3)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_3': t1.agg_3, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 5b7ff7b75..396066d64 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,21 +1,21 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) FILTER(condition=p_size < 15:numeric, columns={'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index b6b7a4abb..91bfe0dcf 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,26 +1,26 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_tax': l_tax}) FILTER(condition=p_size < 5:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 15bd3d015..7f58c39d0 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,10 +1,10 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 9f93ea84e..944f37615 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 4936b25c3..74c69984a 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 707a4ade2..26411f439 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_22': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 300cb9ba1..c29d975b2 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,14 +1,14 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index da9a367f2..a6acb8f40 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index 1b6957518..f47fe1ec5 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_agg_8': SUM(agg_8)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 1ba3a7526..3db1c35e4 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 5d1e6d7e0..97ab075cc 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22_1), ('n_suppliers', sum_sum_expr_18_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_agg_22_1': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18_1': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index b277cf6c4..fdb64f108 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_j.txt b/tests/test_plan_refsols/common_prefix_j.txt index 5cd83f892..f6ba49d6c 100644 --- a/tests/test_plan_refsols/common_prefix_j.txt +++ b/tests/test_plan_refsols/common_prefix_j.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_k.txt b/tests/test_plan_refsols/common_prefix_k.txt index 41de2f7c4..8eaecd723 100644 --- a/tests/test_plan_refsols/common_prefix_k.txt +++ b/tests/test_plan_refsols/common_prefix_k.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_name', c_name), ('region_name', r_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_name': t1.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 67dd911ab..166af0404 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,17 +1,17 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 77d79aae2..1ec0ef2aa 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,15 +1,15 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=CONTAINS(p_name, 'mint':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 3faf6e877..e7b6f51be 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,13 +1,13 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_11': t1.agg_11, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) @@ -16,8 +16,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 42ae08339..199fb1393 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,15 +1,15 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) @@ -18,8 +18,8 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', D SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 47e4f9c62..17bd44462 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'ndistinct_l_partkey': NDISTINCT(l_partkey)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index 14b2f6278..f08e39969 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,13 +1,13 @@ ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 8d8562577..f024a6127 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,15 +1,15 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_l_extendedprice': MAX(anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_n_rows': SUM(n_rows), 'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'anything_l_extendedprice': t1.anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_extendedprice': t1.anything_l_extendedprice, 'anything_p_name': t1.anything_p_name, 'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index da50bc6ba..85f114b4e 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 9095c85fb..4592b0af2 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 62a207995..8bb6e9ff8 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,13 +1,13 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_v.txt b/tests/test_plan_refsols/common_prefix_v.txt index 3dc65ce4f..72dfc31e5 100644 --- a/tests/test_plan_refsols/common_prefix_v.txt +++ b/tests/test_plan_refsols/common_prefix_v.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('region_name', r_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'r_name': t1.r_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_w.txt b/tests/test_plan_refsols/common_prefix_w.txt index 2d7e3a5d0..b64add79d 100644 --- a/tests/test_plan_refsols/common_prefix_w.txt +++ b/tests/test_plan_refsols/common_prefix_w.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('cust_nation_name', n_name)], orderings=[(o_orderkey):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=SLICE(n_name, None:unknown, 1:numeric, None:unknown) == 'A':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index d738602e4..6283b3bc5 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index df9fcfbf2..2bff6eeda 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 23dd535b3..3b3b5fad6 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 4a09564e2..71b6a0111 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index b94e2bf45..fcb05e48c 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_11.txt b/tests/test_plan_refsols/correl_11.txt index 7c55e38f7..b2cdc198b 100644 --- a/tests/test_plan_refsols/correl_11.txt +++ b/tests/test_plan_refsols/correl_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, columns={'p_brand': t0.p_brand}) + JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_12.txt b/tests/test_plan_refsols/correl_12.txt index 84f30b21f..3fceabdc2 100644 --- a/tests/test_plan_refsols/correl_12.txt +++ b/tests/test_plan_refsols/correl_12.txt @@ -1,7 +1,7 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, columns={'p_brand': t0.p_brand}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'brand_avg_price': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 035b6138f..5a738e623 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,10 +1,10 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index e03669bed..b8d16913a 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,12 +1,12 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index b88f0b314..d821dbbe7 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,15 +1,15 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) diff --git a/tests/test_plan_refsols/correl_16.txt b/tests/test_plan_refsols/correl_16.txt index 06cce05d8..f26b6c5e9 100644 --- a/tests/test_plan_refsols/correl_16.txt +++ b/tests/test_plan_refsols/correl_16.txt @@ -1,9 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) FILTER(condition=s_nationkey == n_nationkey & PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) PROJECT(columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey, 'tile': PERCENTILE(args=[], partition=[], order=[(s_acctbal):asc_last, (s_suppkey):asc_last], n_buckets=10000)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index b432479aa..7c1c086c4 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,4 +1,4 @@ ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 8f05634b3..75910fb98 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 607b17293..453727b40 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index c0beb2eaa..7bfc095a3 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 44ce5aa60..29e9f7d0a 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,10 +1,10 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=MONTH(o_orderdate) == 6:numeric & YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index 82b36b594..8a6cdde8d 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index 99d34fe66..aad7e12c4 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,6 +1,6 @@ ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_container': t1.p_container}) + JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_container': t1.p_container}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index 0538d5363..8622ac925 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_combos': COUNT()}) AGGREGATE(keys={'p_container': p_container, 'p_size': p_size, 'p_type': p_type}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 0957f70e7..d5b649cbd 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month), ('n_orders_in_range', n_orders_in_range)], orderings=[(year):asc_first, (month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders_in_range': COUNT()}) - JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index f34df3cdf..e71563e9c 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -1,10 +1,10 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', n_urgent_semi_domestic_rail_orders)], orderings=[(n_urgent_semi_domestic_rail_orders):desc_last, (anything_c_name):asc_first], limit=5:numeric) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) - JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) @@ -12,8 +12,8 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regio SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 7a1ba0639..977ddb095 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_purchases)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_selected_purchases': COUNT(), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -12,6 +12,6 @@ ROOT(columns=[('nation_name', nation_name), ('n_selected_purchases', n_selected_ FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 2de29c99b..4e8bf9fda 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,16 +1,16 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 2de29c99b..a4726946f 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,16 +1,16 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) - JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 7f4bc0b33..fac5389cf 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -12,8 +12,8 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 46a93a3d4..efeaf72d2 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=r_regionkey == anything_n_regionkey, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_comment': c_comment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index 1ce81c590..cda02872f 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', agg_3_17)], orderings=[(anything_region_name):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3_17': t1.n_rows, 'anything_n_name': t0.anything_n_name, 'anything_region_name': t0.anything_region_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -11,9 +11,9 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index 6da351356..684d60c2f 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,10 +1,10 @@ ROOT(columns=[('nation_name', nation_name), ('mean_rev', mean_rev), ('median_rev', median_rev)], orderings=[(nation_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'mean_rev': AVG(l_extendedprice * 1:numeric - l_discount), 'median_rev': MEDIAN(l_extendedprice * 1:numeric - l_discount), 'nation_name': ANYTHING(n_name)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_32.txt b/tests/test_plan_refsols/correl_32.txt index 70a37ff43..1bd7b713a 100644 --- a/tests/test_plan_refsols/correl_32.txt +++ b/tests/test_plan_refsols/correl_32.txt @@ -1,10 +1,10 @@ ROOT(columns=[('customer_name', c_name), ('delta', ABS(c_acctbal - median_s_acctbal))], orderings=[(ABS(c_acctbal - median_s_acctbal)):asc_first], limit=5:numeric) - JOIN(condition=SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) == t1.expr_1 & t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_name': t0.c_name, 'median_s_acctbal': t1.median_s_acctbal}) + JOIN(condition=SLICE(t0.c_phone, -1:numeric, None:unknown, None:unknown) == t1.expr_1 & t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_name': t0.c_name, 'median_s_acctbal': t1.median_s_acctbal}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'expr_1': SLICE(s_phone, -1:numeric, None:unknown, None:unknown), 'n_nationkey': n_nationkey}, aggregations={'median_s_acctbal': MEDIAN(s_acctbal)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal, 's_phone': t1.s_phone}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 's_acctbal': t1.s_acctbal, 's_phone': t1.s_phone}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'MIDDLE EAST':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 1331a72cf..8e6e60923 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - JOIN(condition=MONTH(t0.first_order_date) == t1.expr_3 & YEAR(t0.first_order_date) == t1.expr_2, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=MONTH(t0.first_order_date) == t1.expr_3 & YEAR(t0.first_order_date) == t1.expr_2, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'first_order_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'expr_2': YEAR(o_orderdate), 'expr_3': MONTH(o_orderdate)}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 5ce2487c5..e10964b38 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -2,13 +2,13 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={}) FILTER(condition=l_orderkey == o_orderkey & ps_partkey == l_partkey & ps_suppkey == l_suppkey & o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]) | RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) == 1:numeric, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) @@ -17,6 +17,6 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_linestatus': l_linestatus, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_returnflag': l_returnflag, 'l_suppkey': l_suppkey}) FILTER(condition=YEAR(o_orderdate) >= 1995:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index ec4994e6d..96ec04e08 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -1,9 +1,9 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey & t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey & t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) @@ -12,10 +12,10 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/correl_36.txt b/tests/test_plan_refsols/correl_36.txt index fd87197e6..bb518918c 100644 --- a/tests/test_plan_refsols/correl_36.txt +++ b/tests/test_plan_refsols/correl_36.txt @@ -1,20 +1,20 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) - JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) @@ -22,7 +22,7 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index 84fb530c3..f8778c6d6 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_5.txt b/tests/test_plan_refsols/correl_5.txt index 49ce7f7e3..1ae689fe3 100644 --- a/tests/test_plan_refsols/correl_5.txt +++ b/tests/test_plan_refsols/correl_5.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', name)], orderings=[(name):asc_first]) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'name': ANYTHING(r_name)}) - JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index b58490dcb..6f798d580 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', n_rows)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index d6b8181db..7586ae463 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_8.txt b/tests/test_plan_refsols/correl_8.txt index e1c6b0062..3f1326205 100644 --- a/tests/test_plan_refsols/correl_8.txt +++ b/tests/test_plan_refsols/correl_8.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_9.txt b/tests/test_plan_refsols/correl_9.txt index 09b34ed37..ebcfe8b06 100644 --- a/tests/test_plan_refsols/correl_9.txt +++ b/tests/test_plan_refsols/correl_9.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown) & t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index ed98cb596..38615974e 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index f8c87d703..3c1b317ec 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,14 +1,14 @@ ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index af5e3ba37..223617b10 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index bc53fc653..2934bd9b0 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', num_customers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'num_customers': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/cumulative_stock_analysis.txt b/tests/test_plan_refsols/cumulative_stock_analysis.txt index 79ad18038..98ef1fa97 100644 --- a/tests/test_plan_refsols/cumulative_stock_analysis.txt +++ b/tests/test_plan_refsols/cumulative_stock_analysis.txt @@ -1,5 +1,5 @@ ROOT(columns=[('date_time', sbTxDateTime), ('txn_within_day', RELSIZE(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('n_buys_within_day', RELCOUNT(args=[KEEP_IF(sbTxType, sbTxType == 'buy':string)], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True)), ('pct_apple_txns', ROUND(100.0:numeric * RELSUM(args=[ISIN(sbTickerSymbol, ['AAPL', 'AMZN']:array[unknown])], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric)), ('share_change', RELSUM(args=[IFF(sbTxType == 'buy':string, sbTxShares, 0:numeric - sbTxShares)], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True)), ('rolling_avg_amount', ROUND(RELAVG(args=[sbTxAmount], partition=[], order=[(sbTxDateTime):asc_last], cumulative=True), 2:numeric))], orderings=[(sbTxDateTime):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxAmount': t0.sbTxAmount, 'sbTxDateTime': t0.sbTxDateTime, 'sbTxShares': t0.sbTxShares, 'sbTxType': t0.sbTxType}) FILTER(condition=MONTH(sbTxDateTime) == 4:numeric & YEAR(sbTxDateTime) == 2023:numeric & sbTxStatus == 'success':string, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxAmount': sbTxAmount, 'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares, 'sbTxStatus': sbTxStatus, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index 16700f289..f147342e9 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index 5f000a001..c9265fae6 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 5b37d11f5..082a9e3e7 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,8 +1,8 @@ ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index 5e509bcd6..add80975e 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,24 +1,24 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_suppkey': s_suppkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'r_regionkey': r_regionkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/double_cross.txt b/tests/test_plan_refsols/double_cross.txt index 8a8bc62c0..0b5722202 100644 --- a/tests/test_plan_refsols/double_cross.txt +++ b/tests/test_plan_refsols/double_cross.txt @@ -1,9 +1,9 @@ ROOT(columns=[('wk', ord_wk), ('n_lines', n_rows), ('n_orders', anything_n_orders), ('lpo', ROUND(RELSUM(args=[n_rows], partition=[], order=[(line_wk):asc_last], cumulative=True) / RELSUM(args=[anything_n_orders], partition=[], order=[(ord_wk):asc_last], cumulative=True), 4:numeric))], orderings=[(ord_wk):asc_first]) AGGREGATE(keys={'line_wk': DATEDIFF('week':string, min_date, l_receiptdate), 'ord_wk': ord_wk}, aggregations={'anything_n_orders': ANYTHING(n_orders), 'n_rows': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) AGGREGATE(keys={'ord_wk': DATEDIFF('week':string, min_date, o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) FILTER(condition=o_orderpriority == '1-URGENT':string & o_orderstatus == 'F':string, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/dumb_aggregation.txt b/tests/test_plan_refsols/dumb_aggregation.txt index 2a9906b01..e8c22ab3d 100644 --- a/tests/test_plan_refsols/dumb_aggregation.txt +++ b/tests/test_plan_refsols/dumb_aggregation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('a1', r_name), ('a2', r_name), ('a3', DEFAULT_TO(r_regionkey, 0:numeric)), ('a4', IFF(PRESENT(KEEP_IF(r_regionkey, r_name != 'AMERICA':string)), 1:numeric, 0:numeric)), ('a5', 1:numeric), ('a6', r_regionkey), ('a7', r_name), ('a8', r_regionkey)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) LIMIT(limit=2:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 6594b25a9..63f0f6776 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,13 +1,13 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_event_gap_per_era.txt b/tests/test_plan_refsols/epoch_event_gap_per_era.txt index 9859f20d3..e766e591b 100644 --- a/tests/test_plan_refsols/epoch_event_gap_per_era.txt +++ b/tests/test_plan_refsols/epoch_event_gap_per_era.txt @@ -2,6 +2,6 @@ ROOT(columns=[('era_name', er_name), ('avg_event_gap', avg_event_gap)], ordering AGGREGATE(keys={'er_name': er_name}, aggregations={'anything_er_start_year': ANYTHING(er_start_year), 'avg_event_gap': AVG(day_gap)}) FILTER(condition=er_start_year <= YEAR(ev_dt) & YEAR(ev_dt) < er_end_year, columns={'day_gap': day_gap, 'er_name': er_name, 'er_start_year': er_start_year}) PROJECT(columns={'day_gap': DATEDIFF('days':string, PREV(args=[ev_dt], partition=[er_name, er_name], order=[(ev_dt):asc_last]), ev_dt), 'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year, 'ev_dt': ev_dt}) - JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_end_year': t0.er_end_year, 'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) + JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'er_end_year': t0.er_end_year, 'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_events_per_season.txt b/tests/test_plan_refsols/epoch_events_per_season.txt index eaf3e6df1..3e35da6bd 100644 --- a/tests/test_plan_refsols/epoch_events_per_season.txt +++ b/tests/test_plan_refsols/epoch_events_per_season.txt @@ -1,5 +1,5 @@ ROOT(columns=[('season_name', s_name), ('n_events', n_events)], orderings=[(n_events):desc_last, (s_name):asc_first]) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_first_event_per_era.txt b/tests/test_plan_refsols/epoch_first_event_per_era.txt index 14940bc77..a4a1793d0 100644 --- a/tests/test_plan_refsols/epoch_first_event_per_era.txt +++ b/tests/test_plan_refsols/epoch_first_event_per_era.txt @@ -1,5 +1,5 @@ ROOT(columns=[('era_name', er_name), ('event_name', ev_name)], orderings=[(er_start_year):asc_first]) FILTER(condition=RANKING(args=[], partition=[er_name], order=[(ev_dt):asc_last], allow_ties=False) == 1:numeric, columns={'er_name': er_name, 'er_start_year': er_start_year, 'ev_name': ev_name}) - JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt, 'ev_name': t1.ev_name}) + JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt, 'ev_name': t1.ev_name}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index dbbfed385..471d3e119 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,22 +1,22 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) + JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, columns={'name_9': t1.s_name, 's_name': t0.s_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt index 65a82f60a..9473b9fc4 100644 --- a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt +++ b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt @@ -1,6 +1,6 @@ ROOT(columns=[('tod', t_name), ('search_engine', search_engine), ('n_searches', n_searches)], orderings=[(t_name):asc_first]) FILTER(condition=RANKING(args=[], partition=[t_name], order=[(n_searches):desc_first, (search_engine):asc_last], allow_ties=False) == 1:numeric, columns={'n_searches': n_searches, 'search_engine': search_engine, 't_name': t_name}) AGGREGATE(keys={'search_engine': search_engine, 't_name': t_name}, aggregations={'n_searches': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_engine': t1.search_engine, 't_name': t0.t_name}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_engine': t1.search_engine, 't_name': t0.t_name}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt index 5c5133e8c..2638541a6 100644 --- a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt +++ b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt @@ -1,8 +1,8 @@ ROOT(columns=[('region', user_region), ('event_type', ev_typ), ('n_searches', n_searches)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[user_region], order=[(n_searches):desc_first], allow_ties=False) == 1:numeric, columns={'ev_typ': ev_typ, 'n_searches': n_searches, 'user_region': user_region}) AGGREGATE(keys={'ev_typ': ev_typ, 'user_region': user_region}, aggregations={'n_searches': NDISTINCT(search_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ev_typ': t0.ev_typ, 'search_id': t0.search_id, 'user_region': t1.user_region}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t1.search_id, 'search_user_id': t1.search_user_id}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t0.search_id, 'user_region': t1.user_region}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t1.search_id, 'search_user_id': t1.search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=USERS, columns={'user_id': user_id, 'user_region': user_region}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 09744a70d..225fc5ac2 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, columns={}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 2bd8dd3ae..3c0b8dc3a 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -1,9 +1,9 @@ ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users)], orderings=[(n_other_users):desc_last, (anything_user_name):asc_first], limit=7:numeric) AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'n_other_users': NDISTINCT(user_id_11)}) - JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index f7b32c767..b918f541e 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -2,10 +2,10 @@ ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_searc AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_searches': COUNT()}) FILTER(condition=user_id == anything_search_user_id, columns={'anything_user_name': anything_user_name, 'user_id': user_id}) AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id), 'anything_user_name': ANYTHING(user_name)}) - JOIN(condition=t1.user_name != t0.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_id_8': t1.search_user_id, 'user_name': t0.user_name}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t1.user_name != t0.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_id_8': t1.search_user_id, 'user_name': t0.user_name}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 945bd82c2..88b48990a 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index 7334ed1ff..a4f00ca05 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_summer_events_per_type.txt b/tests/test_plan_refsols/epoch_summer_events_per_type.txt index d5e60f4f4..619b2e3a9 100644 --- a/tests/test_plan_refsols/epoch_summer_events_per_type.txt +++ b/tests/test_plan_refsols/epoch_summer_events_per_type.txt @@ -1,6 +1,6 @@ ROOT(columns=[('event_type', ev_typ), ('n_events', n_events)], orderings=[(ev_typ):asc_first]) AGGREGATE(keys={'ev_typ': ev_typ}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t0.ev_dt) == t1.first_month | MONTH(t0.ev_dt) == t1.second_month | MONTH(t0.ev_dt) == t1.third_month, type=INNER, cardinality=SINGULAR_FILTER, columns={'ev_typ': t0.ev_typ}) + JOIN(condition=MONTH(t0.ev_dt) == t1.first_month | MONTH(t0.ev_dt) == t1.second_month | MONTH(t0.ev_dt) == t1.third_month, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_typ': t0.ev_typ}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_typ': ev_typ}) FILTER(condition=s_name == 'Summer':string, columns={'first_month': s_month1, 'second_month': s_month2, 'third_month': s_month3}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt index 8e972942e..09e213e3f 100644 --- a/tests/test_plan_refsols/epoch_unique_users_per_engine.txt +++ b/tests/test_plan_refsols/epoch_unique_users_per_engine.txt @@ -1,9 +1,9 @@ ROOT(columns=[('engine', search_engine), ('n_users', DEFAULT_TO(ndistinct_user_id, 0:numeric))], orderings=[(search_engine):asc_first]) - JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) + JOIN(condition=t0.search_engine == t1.search_engine, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ndistinct_user_id': t1.ndistinct_user_id, 'search_engine': t0.search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={}) SCAN(table=SEARCHES, columns={'search_engine': search_engine}) AGGREGATE(keys={'search_engine': search_engine}, aggregations={'ndistinct_user_id': NDISTINCT(user_id)}) - JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) + JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'search_engine': t0.search_engine, 'user_id': t1.user_id}) FILTER(condition=MONOTONIC(2010:numeric, YEAR(search_ts), 2019:numeric), columns={'search_engine': search_engine, 'search_user_id': search_user_id}) SCAN(table=SEARCHES, columns={'search_engine': search_engine, 'search_ts': search_ts, 'search_user_id': search_user_id}) SCAN(table=USERS, columns={'user_id': user_id}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index 32fa17508..3115b76b7 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,10 +1,10 @@ ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) - JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) + JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index ef68303cf..3842761d5 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/function_sampler.txt b/tests/test_plan_refsols/function_sampler.txt index f6aa4aa45..5ab472869 100644 --- a/tests/test_plan_refsols/function_sampler.txt +++ b/tests/test_plan_refsols/function_sampler.txt @@ -1,6 +1,6 @@ ROOT(columns=[('a', JOIN_STRINGS('-':string, r_name, n_name, SLICE(c_name, 16:numeric, None:unknown, None:unknown))), ('b', ROUND(c_acctbal, 1:numeric)), ('c', KEEP_IF(c_name, SLICE(c_phone, None:unknown, 1:numeric, None:unknown) == '3':string)), ('d', PRESENT(KEEP_IF(c_name, SLICE(c_phone, 1:numeric, 2:numeric, None:unknown) == '1':string))), ('e', ABSENT(KEEP_IF(c_name, SLICE(c_phone, 14:numeric, None:unknown, None:unknown) == '7':string))), ('f', ROUND(c_acctbal))], orderings=[(c_address):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=MONOTONIC(0.0:numeric, c_acctbal, 100.0:numeric), columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/global_aggfunc_backref.txt b/tests/test_plan_refsols/global_aggfunc_backref.txt index 1a899ced4..2bfe14f1c 100644 --- a/tests/test_plan_refsols/global_aggfunc_backref.txt +++ b/tests/test_plan_refsols/global_aggfunc_backref.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('is_above_avg', p_retailprice > avg_price)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_price': t0.avg_price, 'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt b/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt index b4f73f77c..6b8378233 100644 --- a/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt +++ b/tests/test_plan_refsols/global_aggfuncs_multiple_children.txt @@ -1,6 +1,6 @@ ROOT(columns=[('num_cust', num_cust), ('num_supp', num_supp), ('num_part', num_part)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'num_cust': t0.num_cust, 'num_part': t1.num_part, 'num_supp': t0.num_supp}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'num_cust': t0.num_cust, 'num_supp': t1.num_supp}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'num_cust': t0.num_cust, 'num_part': t1.num_part, 'num_supp': t0.num_supp}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'num_cust': t0.num_cust, 'num_supp': t1.num_supp}) AGGREGATE(keys={}, aggregations={'num_cust': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={}) AGGREGATE(keys={}, aggregations={'num_supp': COUNT()}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 30ead1632..294688d84 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/join_asia_region_nations.txt b/tests/test_plan_refsols/join_asia_region_nations.txt index 86200b10a..78532832c 100644 --- a/tests/test_plan_refsols/join_asia_region_nations.txt +++ b/tests/test_plan_refsols/join_asia_region_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by.txt b/tests/test_plan_refsols/join_order_by.txt index 0d23a654e..e84c9ff03 100644 --- a/tests/test_plan_refsols/join_order_by.txt +++ b/tests/test_plan_refsols/join_order_by.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by_back_reference.txt b/tests/test_plan_refsols/join_order_by_back_reference.txt index f92cd6f15..c2fa8442c 100644 --- a/tests/test_plan_refsols/join_order_by_back_reference.txt +++ b/tests/test_plan_refsols/join_order_by_back_reference.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt b/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt index c3325f51b..ad9495668 100644 --- a/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt +++ b/tests/test_plan_refsols/join_order_by_pruned_back_reference.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations.txt b/tests/test_plan_refsols/join_region_nations.txt index 7e6add7b1..c319d0a25 100644 --- a/tests/test_plan_refsols/join_region_nations.txt +++ b/tests/test_plan_refsols/join_region_nations.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations_customers.txt b/tests/test_plan_refsols/join_region_nations_customers.txt index 66c4c5236..03148c27c 100644 --- a/tests/test_plan_refsols/join_region_nations_customers.txt +++ b/tests/test_plan_refsols/join_region_nations_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', c_custkey), ('name', c_name), ('address', c_address), ('nation_key', c_nationkey), ('phone', c_phone), ('account_balance', c_acctbal), ('market_segment', c_mktsegment), ('comment', c_comment)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/join_topk.txt b/tests/test_plan_refsols/join_topk.txt index f92640023..b5fed4ca0 100644 --- a/tests/test_plan_refsols/join_topk.txt +++ b/tests/test_plan_refsols/join_topk.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(n_name):asc_last], limit=10:numeric) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments.txt b/tests/test_plan_refsols/lineitem_regional_shipments.txt index 7a9764497..ba505efe5 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments.txt @@ -1,17 +1,17 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments2.txt b/tests/test_plan_refsols/lineitem_regional_shipments2.txt index 711bcecdf..b79ec8de3 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments2.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments2.txt @@ -1,17 +1,17 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments3.txt b/tests/test_plan_refsols/lineitem_regional_shipments3.txt index 80e53fe22..92972470c 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments3.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'r_name': t0.r_name}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_custkey': t1.o_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index c9f1900fe..589daa0bc 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,14 +1,14 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 57f92d814..d8e5cd342 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,14 +1,14 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=STARTSWITH(p_type, 'ECONOMY':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt index e06f975c8..f1caafb88 100644 --- a/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt +++ b/tests/test_plan_refsols/lines_shipping_vs_customer_region.txt @@ -1,17 +1,17 @@ ROOT(columns=[('order_year', YEAR(o_orderdate)), ('customer_region_name', r_name), ('customer_nation_name', n_name), ('supplier_region_name', supplier_region_name), ('nation_name', nation_name)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'nation_name': t1.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name, 'supplier_region_name': t1.r_name}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'r_name': t0.r_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'r_name': t1.r_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 0bce6d3db..132a9adcc 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index 105e87e4a..84d08983f 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 68339d2fd..6b901bb6d 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 9f4f4ddaa..2ab7abe26 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index ea1267de4..00be4afea 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,25 +1,25 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b58741ac6..2cd523e09 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,19 +1,19 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpTickerId': t1.sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9b5f5003a..2efeb070b 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_ticker_max_shares': MAX(sbTxShares)}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 5aea3ab12..3356a5d74 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,13 +1,13 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', sum_n_ticker_type_trans_1), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) - JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index 3da2dfe6f..e793349dc 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,47 +1,47 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index ba13f4825..ec80d0ec2 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -1,22 +1,22 @@ ROOT(columns=[('name', p_name)], orderings=[]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'ARGENTINA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index afbab2010..18e86d4d9 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index 2e2b636f1..a835cca09 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 977b01033..23594226f 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index caee91153..a204db9ef 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -1,12 +1,12 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', o_orderkey), ('order_value', o_totalprice), ('value_percentage', value_percentage)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/nation_name_contains_region_name.txt b/tests/test_plan_refsols/nation_name_contains_region_name.txt index 1dedfd1e3..2244b257e 100644 --- a/tests/test_plan_refsols/nation_name_contains_region_name.txt +++ b/tests/test_plan_refsols/nation_name_contains_region_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/nations_access_region.txt b/tests/test_plan_refsols/nations_access_region.txt index d8bdd58af..f9fef55af 100644 --- a/tests/test_plan_refsols/nations_access_region.txt +++ b/tests/test_plan_refsols/nations_access_region.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt index 3a35369e0..81fcffc76 100644 --- a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt +++ b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/nations_region_order_by_name.txt b/tests/test_plan_refsols/nations_region_order_by_name.txt index 0d2e21720..951583d9a 100644 --- a/tests/test_plan_refsols/nations_region_order_by_name.txt +++ b/tests/test_plan_refsols/nations_region_order_by_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(n_name):asc_last, (r_name):asc_last]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 2e514641b..7ec5765c3 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,9 +1,9 @@ ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index b7f84c0f5..add0e0f3c 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index a556f2e22..84a9d4f04 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,6 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/order_by_before_join.txt b/tests/test_plan_refsols/order_by_before_join.txt index 7e6add7b1..c319d0a25 100644 --- a/tests/test_plan_refsols/order_by_before_join.txt +++ b/tests/test_plan_refsols/order_by_before_join.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/ordered_asian_nations.txt b/tests/test_plan_refsols/ordered_asian_nations.txt index 1dff06885..249b892c5 100644 --- a/tests/test_plan_refsols/ordered_asian_nations.txt +++ b/tests/test_plan_refsols/ordered_asian_nations.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(n_name):asc_last]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index c3c7ca09c..e0220e6ff 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 139b7720a..455520f65 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 568ad6059..915d68fb4 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,9 +1,9 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'VIETNAM':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index 3044ced65..d7c5bd416 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,10 +1,10 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0': page_rank_0, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index fb470dd74..9f476f893 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,14 +1,14 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_20, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_20': page_rank_0_20, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_20': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index ebb6c114c..e67d91867 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,30 +1,30 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_590, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_590': page_rank_0_590, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_590': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_580 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_580': t0.page_rank_0_580, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_580': t0.page_rank_0_580, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_580': t0.page_rank_0_580, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_580': page_rank_0_580, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_580': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_570 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_570': t0.page_rank_0_570, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_570': t0.page_rank_0_570, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_570': t0.page_rank_0_570, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_570': page_rank_0_570, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_570': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_560 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_560': t0.page_rank_0_560, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_560': t0.page_rank_0_560, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_560': t0.page_rank_0_560, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_560': page_rank_0_560, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_560': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_550 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_550': t0.page_rank_0_550, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_550': t0.page_rank_0_550, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_550': t0.page_rank_0_550, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_550': page_rank_0_550, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_550': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index 61662e907..a55bcc47a 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,18 +1,18 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_58, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_58': page_rank_0_58, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_58': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_48 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_48': t0.page_rank_0_48, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_48': t0.page_rank_0_48, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_48': t0.page_rank_0_48, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_48': page_rank_0_48, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_48': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index 3bd771724..cf9b305ab 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,22 +1,22 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_134, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_134': page_rank_0_134, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_134': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_124 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_124': t0.page_rank_0_124, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_124': t0.page_rank_0_124, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_124': t0.page_rank_0_124, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_124': page_rank_0_124, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_124': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_114 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_114': t0.page_rank_0_114, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_114': t0.page_rank_0_114, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_114': t0.page_rank_0_114, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_114': page_rank_0_114, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_114': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index 409149b59..382de6bab 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,26 +1,26 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_286, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_286': page_rank_0_286, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_286': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_276 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_276': t0.page_rank_0_276, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_276': t0.page_rank_0_276, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_276': t0.page_rank_0_276, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_276': page_rank_0_276, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_276': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_266 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_266': t0.page_rank_0_266, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_266': t0.page_rank_0_266, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_266': t0.page_rank_0_266, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_266': page_rank_0_266, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_266': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_256 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_256': t0.page_rank_0_256, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_256': t0.page_rank_0_256, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_256': t0.page_rank_0_256, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_256': page_rank_0_256, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_256': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 6c7fe4d18..8551440bf 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -1,38 +1,38 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_2414, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank_0_2414': page_rank_0_2414, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank_0_2414': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2404 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2404': t0.page_rank_0_2404, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2404': page_rank_0_2404, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2404': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2394 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2394': t0.page_rank_0_2394, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2394': page_rank_0_2394, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2394': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2384 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2384': t0.page_rank_0_2384, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2384': page_rank_0_2384, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2384': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2374 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2374': t0.page_rank_0_2374, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2374': page_rank_0_2374, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2374': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2364 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2364': t0.page_rank_0_2364, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2364': page_rank_0_2364, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2364': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0_2354 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0_2354': t0.page_rank_0_2354, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0_2354': page_rank_0_2354, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0_2354': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank_0 / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank_0': t0.page_rank_0, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank_0': t0.page_rank_0, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank_0': page_rank_0, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank_0': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/part_cross_part_a.txt b/tests/test_plan_refsols/part_cross_part_a.txt index 94cb27eb1..c4402eb78 100644 --- a/tests/test_plan_refsols/part_cross_part_a.txt +++ b/tests/test_plan_refsols/part_cross_part_a.txt @@ -1,14 +1,14 @@ ROOT(columns=[('state', sbCustState), ('exchange', sbTickerExchange), ('n', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(sbCustState):asc_first, (sbTickerExchange):asc_first]) AGGREGATE(keys={'sbCustState': sbCustState, 'sbTickerExchange': sbTickerExchange}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbCustId == t1.sbCustId, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbCustState': t1.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbCustId == t1.sbCustId, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbCustState': t1.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId, 'sbCustState': sbCustState}) AGGREGATE(keys={'sbCustId': sbCustId, 'sbTickerExchange': sbTickerExchange}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) diff --git a/tests/test_plan_refsols/part_cross_part_b.txt b/tests/test_plan_refsols/part_cross_part_b.txt index 8685ad920..f32335bba 100644 --- a/tests/test_plan_refsols/part_cross_part_b.txt +++ b/tests/test_plan_refsols/part_cross_part_b.txt @@ -1,15 +1,15 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[sbCustState], order=[(month):asc_last], cumulative=True))], orderings=[(sbCustState):asc_first, (month):asc_first]) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) diff --git a/tests/test_plan_refsols/part_cross_part_c.txt b/tests/test_plan_refsols/part_cross_part_c.txt index 3f025b594..9062336ea 100644 --- a/tests/test_plan_refsols/part_cross_part_c.txt +++ b/tests/test_plan_refsols/part_cross_part_c.txt @@ -1,16 +1,16 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_n': MAX(DEFAULT_TO(n_rows, 0:numeric))}) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) AGGREGATE(keys={'month': DATETIME(sbTxDateTime, 'start of month':string)}, aggregations={}) diff --git a/tests/test_plan_refsols/part_reduced_size.txt b/tests/test_plan_refsols/part_reduced_size.txt index 15f3a6a3e..68b341aaa 100644 --- a/tests/test_plan_refsols/part_reduced_size.txt +++ b/tests/test_plan_refsols/part_reduced_size.txt @@ -1,5 +1,5 @@ ROOT(columns=[('reduced_size', FLOAT(p_size / 2.5:numeric)), ('retail_price_int', INTEGER(p_retailprice)), ('message', JOIN_STRINGS('':string, 'old size: ':string, STRING(p_size))), ('discount', l_discount), ('date_dmy', STRING(l_receiptdate, '%d-%m-%Y':string)), ('date_md', STRING(l_receiptdate, '%m/%d':string)), ('am_pm', STRING(l_receiptdate, '%H:%M%p':string))], orderings=[(l_discount):desc_last], limit=5:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_receiptdate': t1.l_receiptdate, 'p_retailprice': t0.p_retailprice, 'p_size': t0.p_size}) LIMIT(limit=2:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}, orderings=[(INTEGER(p_retailprice)):asc_first]) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_partkey': l_partkey, 'l_receiptdate': l_receiptdate}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index 99a8d19a6..be38b3dc5 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,16 +1,16 @@ ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipmode': l_shipmode}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/percentile_customers_per_region.txt b/tests/test_plan_refsols/percentile_customers_per_region.txt index 7e1c81b55..0000648c9 100644 --- a/tests/test_plan_refsols/percentile_customers_per_region.txt +++ b/tests/test_plan_refsols/percentile_customers_per_region.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name)], orderings=[(c_name):asc_first]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) == 95:numeric & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/quantile_function_test_1.txt b/tests/test_plan_refsols/quantile_function_test_1.txt index 51b0be264..c460cafe5 100644 --- a/tests/test_plan_refsols/quantile_function_test_1.txt +++ b/tests/test_plan_refsols/quantile_function_test_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('seventieth_order_price', seventieth_order_price)], orderings=[]) AGGREGATE(keys={}, aggregations={'seventieth_order_price': QUANTILE(o_totalprice, 0.7:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 811c4b573..163793d83 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 811c4b573..163793d83 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index 692ccd801..b49c995d4 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,11 +1,11 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'agg_0': QUANTILE(o_totalprice, 0.1:numeric), 'agg_1': QUANTILE(o_totalprice, 0.01:numeric), 'agg_2': QUANTILE(o_totalprice, 0.25:numeric), 'agg_3': QUANTILE(o_totalprice, 0.75:numeric), 'agg_4': QUANTILE(o_totalprice, 0.9:numeric), 'agg_5': QUANTILE(o_totalprice, 0.99:numeric), 'agg_6': QUANTILE(o_totalprice, 1.0:numeric), 'agg_7': QUANTILE(o_totalprice, 0.5:numeric), 'agg_8': QUANTILE(o_totalprice, 0.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/rank_customers_per_nation.txt b/tests/test_plan_refsols/rank_customers_per_nation.txt index 76432f5e6..7384d661e 100644 --- a/tests/test_plan_refsols/rank_customers_per_nation.txt +++ b/tests/test_plan_refsols/rank_customers_per_nation.txt @@ -1,4 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first], allow_ties=True))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_customers_per_region.txt b/tests/test_plan_refsols/rank_customers_per_region.txt index ab8ee8d7d..272d3f0a6 100644 --- a/tests/test_plan_refsols/rank_customers_per_region.txt +++ b/tests/test_plan_refsols/rank_customers_per_region.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=True, dense=True))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_nations_by_region.txt b/tests/test_plan_refsols/rank_nations_by_region.txt index 68631cbf6..7dbc6d2bf 100644 --- a/tests/test_plan_refsols/rank_nations_by_region.txt +++ b/tests/test_plan_refsols/rank_nations_by_region.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[], order=[(r_name):asc_last], allow_ties=True))], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 1e519b6c2..60f1383e8 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt index 7fd4a68da..0cbd4c887 100644 --- a/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt +++ b/tests/test_plan_refsols/rank_parts_per_supplier_region_by_size.txt @@ -1,8 +1,8 @@ ROOT(columns=[('key', p_partkey), ('region', r_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(p_size):desc_first, (p_container):desc_first, (p_type):desc_first], allow_ties=True, dense=True))], orderings=[(p_partkey):asc_first], limit=15:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_container': t1.p_container, 'p_partkey': t1.p_partkey, 'p_size': t1.p_size, 'p_type': t1.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index 52ebabdc7..f985be11c 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,6 +1,6 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) - JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) + JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index d58e9d39e..88446e286 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index a748f74d8..0f1fb8476 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_nations_backref.txt b/tests/test_plan_refsols/region_nations_backref.txt index f24e02fc4..4b66a2f19 100644 --- a/tests/test_plan_refsols/region_nations_backref.txt +++ b/tests/test_plan_refsols/region_nations_backref.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index 19af8caf1..3a4c4a90a 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey}) FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/regional_first_order_best_line_part.txt b/tests/test_plan_refsols/regional_first_order_best_line_part.txt index 734a95f90..3f1e334ca 100644 --- a/tests/test_plan_refsols/regional_first_order_best_line_part.txt +++ b/tests/test_plan_refsols/regional_first_order_best_line_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('region_name', r_name), ('part_name', p_name)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False) == 1:numeric, columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'n_regionkey': t0.n_regionkey}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index f3c756a0d..d968b9e59 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index 7b66ff2ea..a8f014f55 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,10 +1,10 @@ ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/replace_order_by.txt b/tests/test_plan_refsols/replace_order_by.txt index f92cd6f15..c2fa8442c 100644 --- a/tests/test_plan_refsols/replace_order_by.txt +++ b/tests/test_plan_refsols/replace_order_by.txt @@ -1,4 +1,4 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name)], orderings=[(r_name):desc_last]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/richest_customer_per_region.txt b/tests/test_plan_refsols/richest_customer_per_region.txt index c6ecbf9f5..426377814 100644 --- a/tests/test_plan_refsols/richest_customer_per_region.txt +++ b/tests/test_plan_refsols/richest_customer_per_region.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('customer_name', c_name), ('balance', c_acctbal)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'n_name': n_name, 'r_name': r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 8b83d01e6..9d44d3f56 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/semi_singular.txt b/tests/test_plan_refsols/semi_singular.txt index dbf9d3306..675513cc3 100644 --- a/tests/test_plan_refsols/semi_singular.txt +++ b/tests/test_plan_refsols/semi_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_anti_1.txt b/tests/test_plan_refsols/simple_anti_1.txt index 7537fb24b..1b99d8c05 100644 --- a/tests/test_plan_refsols/simple_anti_1.txt +++ b/tests/test_plan_refsols/simple_anti_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index 94a3573e4..322e6b23c 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_1.txt b/tests/test_plan_refsols/simple_cross_1.txt index 69e8090c6..77eee16bc 100644 --- a/tests/test_plan_refsols/simple_cross_1.txt +++ b/tests/test_plan_refsols/simple_cross_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 275add951..4113ecab2 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'r_regionkey': t0.r_regionkey}) - JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index aea66e352..27cff2d37 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/simple_cross_12.txt b/tests/test_plan_refsols/simple_cross_12.txt index 247a0b247..e1f60b9e3 100644 --- a/tests/test_plan_refsols/simple_cross_12.txt +++ b/tests/test_plan_refsols/simple_cross_12.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_priority', o_orderpriority), ('market_segment', c_mktsegment)], orderings=[(o_orderpriority):asc_first, (c_mktsegment):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_mktsegment': t1.c_mktsegment, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_mktsegment': t1.c_mktsegment, 'o_orderpriority': t0.o_orderpriority}) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderpriority': o_orderpriority}) AGGREGATE(keys={'c_mktsegment': c_mktsegment}, aggregations={}) diff --git a/tests/test_plan_refsols/simple_cross_2.txt b/tests/test_plan_refsols/simple_cross_2.txt index 714f36eb7..a5f30f5fa 100644 --- a/tests/test_plan_refsols/simple_cross_2.txt +++ b/tests/test_plan_refsols/simple_cross_2.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_3.txt b/tests/test_plan_refsols/simple_cross_3.txt index 2876b8657..a694edb4c 100644 --- a/tests/test_plan_refsols/simple_cross_3.txt +++ b/tests/test_plan_refsols/simple_cross_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_nation', anything_supplier_nation), ('customer_nation', anything_n_name), ('nation_combinations', n_rows)], orderings=[]) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_supplier_nation': ANYTHING(supplier_nation), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.key_8 == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) - JOIN(condition=t0.key_5 == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_5': t0.key_5, 'key_8': t1.n_nationkey, 'n_name': t1.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.n_name}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'key_5': t1.r_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.key_8 == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.supplier_nation}) + JOIN(condition=t0.key_5 == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'key_5': t0.key_5, 'key_8': t1.n_nationkey, 'n_name': t1.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey, 'supplier_nation': t0.n_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_5': t1.r_regionkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_regionkey': t0.r_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index ed891e481..0dd20b88a 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index 20572dfc8..76c51fc6b 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ('best_order_priority_qty', total_qty)], orderings=[(p_size):asc_first]) - JOIN(condition=t0.p_size == t1.p_size, type=LEFT, cardinality=SINGULAR_FILTER, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) + JOIN(condition=t0.p_size == t1.p_size, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size, 'total_qty': t1.total_qty}) LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) @@ -7,9 +7,9 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( PROJECT(columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'total_qty': DEFAULT_TO(sum_l_quantity, 0:numeric)}) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'sum_l_quantity': sum_l_quantity}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'p_size': p_size}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_cross_6.txt b/tests/test_plan_refsols/simple_cross_6.txt index bfd9474c3..20c725da5 100644 --- a/tests/test_plan_refsols/simple_cross_6.txt +++ b/tests/test_plan_refsols/simple_cross_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_pairs', n_pairs)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_pairs': COUNT()}) - JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 370589ba7..78f1f71f4 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -1,9 +1,9 @@ ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (o_orderkey):asc_first], limit=5:numeric) - JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_8.txt b/tests/test_plan_refsols/simple_cross_8.txt index aee7a052f..b40fe6b0e 100644 --- a/tests/test_plan_refsols/simple_cross_8.txt +++ b/tests/test_plan_refsols/simple_cross_8.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', customer_region), ('region_combinations', region_combinations)], orderings=[]) AGGREGATE(keys={'key_2': key_2, 'r_regionkey': r_regionkey}, aggregations={'anything_supplier_region': ANYTHING(supplier_region), 'customer_region': ANYTHING(r_name), 'region_combinations': COUNT()}) FILTER(condition=name_18 == supplier_region, columns={'key_2': key_2, 'r_name': r_name, 'r_regionkey': r_regionkey, 'supplier_region': supplier_region}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -16,8 +16,8 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) FILTER(condition=MONTH(l_shipdate) == 3:numeric & YEAR(l_shipdate) == 1998:numeric, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) FILTER(condition=s_acctbal < 0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index a1e3233b6..7a1abb072 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n1', n_name), ('n2', n2)], orderings=[(n_name):asc_first, (n2):asc_first], limit=10:numeric) - JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n2': t1.n_name, 'n_name': t0.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n2': t1.n_name, 'n_name': t0.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_semi_1.txt b/tests/test_plan_refsols/simple_semi_1.txt index a7dcb04b6..98a13ec16 100644 --- a/tests/test_plan_refsols/simple_semi_1.txt +++ b/tests/test_plan_refsols/simple_semi_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index c308de1b0..146ebf45e 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size < 10:numeric, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index af3b1bbe0..505bad30b 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var', sample_var), ('sample_std', sample_std), ('pop_var', pop_var), ('pop_std', pop_std)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) diff --git a/tests/test_plan_refsols/singular1.txt b/tests/test_plan_refsols/singular1.txt index db0c193a7..665f6f40c 100644 --- a/tests/test_plan_refsols/singular1.txt +++ b/tests/test_plan_refsols/singular1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('nation_4_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=n_nationkey == 4:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/singular2.txt b/tests/test_plan_refsols/singular2.txt index dcdbced2c..294c7666f 100644 --- a/tests/test_plan_refsols/singular2.txt +++ b/tests/test_plan_refsols/singular2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', n_name), ('okey', o_orderkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=c_custkey == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=o_orderkey == 454791:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index fca6acd8d..e3f6787ca 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index bd2891393..f8ce4600b 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index bf6465b3d..44b340fcb 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -1,12 +1,12 @@ ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anything_l_shipdate)], orderings=[(max_anything_l_shipdate):asc_first, (p_container):asc_first], limit=5:numeric) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_shipdate': max_anything_l_shipdate, 'p_container': p_container}) AGGREGATE(keys={'p_container': p_container}, aggregations={'max_anything_l_shipdate': MAX(anything_l_shipdate), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) + JOIN(condition=t0.p_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_shipdate': t1.anything_l_shipdate, 'n_rows': t1.n_rows, 'p_container': t0.p_container}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index c118e82c9..a547c658e 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 105329e60..9bf93569a 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,11 +1,11 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index de7e0a463..7de3c3eec 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/singular_semi.txt b/tests/test_plan_refsols/singular_semi.txt index 5a46a6430..7c881d169 100644 --- a/tests/test_plan_refsols/singular_semi.txt +++ b/tests/test_plan_refsols/singular_semi.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', r_name)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/smoke_d.txt b/tests/test_plan_refsols/smoke_d.txt index 1edf329d2..6423c59ff 100644 --- a/tests/test_plan_refsols/smoke_d.txt +++ b/tests/test_plan_refsols/smoke_d.txt @@ -1,6 +1,6 @@ ROOT(columns=[('key', c_custkey), ('a', RANKING(args=[], partition=[], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('b', RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('c', RANKING(args=[], partition=[], order=[(c_mktsegment):asc_last], allow_ties=True)), ('d', RANKING(args=[], partition=[], order=[(c_mktsegment):asc_last], allow_ties=True, dense=True)), ('e', PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last, (c_custkey):asc_last])), ('f', PERCENTILE(args=[], partition=[c_nationkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=12)), ('g', PREV(args=[c_custkey], partition=[], order=[(c_custkey):asc_last])), ('h', PREV(args=[c_custkey], partition=[c_nationkey], order=[(c_custkey):asc_last], n=2, default=-1)), ('i', NEXT(args=[c_custkey], partition=[], order=[(c_custkey):asc_last])), ('j', NEXT(args=[c_custkey], partition=[c_nationkey], order=[(c_custkey):asc_last], n=6000)), ('k', RELSUM(args=[c_acctbal], partition=[c_nationkey], order=[])), ('l', RELSUM(args=[c_acctbal], partition=[], order=[(c_custkey):asc_last], cumulative=True)), ('m', ROUND(RELAVG(args=[c_acctbal], partition=[], order=[]), 2:numeric)), ('n', ROUND(RELAVG(args=[c_acctbal], partition=[c_nationkey], order=[(c_custkey):asc_last], frame=(None, -1)), 2:numeric)), ('o', RELCOUNT(args=[KEEP_IF(c_acctbal, c_acctbal > 0:numeric)], partition=[], order=[])), ('p', RELSIZE(args=[], partition=[], order=[]))], orderings=[(c_custkey):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_nationkey': t1.c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_nationkey': t1.c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt index b88dc1c8a..5a772b6e9 100644 --- a/tests/test_plan_refsols/sqlite_udf_combine_strings.txt +++ b/tests/test_plan_refsols/sqlite_udf_combine_strings.txt @@ -1,6 +1,6 @@ ROOT(columns=[('s1', s1), ('s2', s2), ('s3', s3), ('s4', s4)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, columns={'s1': t0.s1, 's2': t0.s2, 's3': t0.s3, 's4': t1.s4}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t1.s3}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t0.s3, 's4': t1.s4}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'s1': t0.s1, 's2': t0.s2, 's3': t1.s3}) AGGREGATE(keys={}, aggregations={'s1': COMBINE_STRINGS(r_name), 's2': COMBINE_STRINGS(KEEP_IF(r_name, r_name != 'EUROPE':string), ', ':string)}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) AGGREGATE(keys={}, aggregations={'s3': COMBINE_STRINGS(SLICE(n_name, None:unknown, 1:numeric, None:unknown), '':string)}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 380033384..2eff260ee 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) PROJECT(columns={'avg_balance': RELAVG(args=[c_acctbal], partition=[n_regionkey], order=[]), 'c_acctbal': c_acctbal, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 978f366b2..3adfbe06d 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index 23a6c9aeb..ca252a32f 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -2,7 +2,7 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_nval.txt b/tests/test_plan_refsols/sqlite_udf_nval.txt index 6435249df..67f0c0f2f 100644 --- a/tests/test_plan_refsols/sqlite_udf_nval.txt +++ b/tests/test_plan_refsols/sqlite_udf_nval.txt @@ -1,4 +1,4 @@ ROOT(columns=[('rname', r_name), ('nname', n_name), ('v1', NVAL(args=[n_name, 3:numeric], partition=[], order=[(n_name):asc_last])), ('v2', NVAL(args=[n_name, 1:numeric], partition=[n_regionkey], order=[(n_name):asc_last])), ('v3', NVAL(args=[n_name, 2:numeric], partition=[n_regionkey], order=[(n_name):asc_last], frame=(1, None))), ('v4', NVAL(args=[n_name, 5:numeric], partition=[], order=[(n_name):asc_last], cumulative=True))], orderings=[(r_name):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index c0b287617..b2444dc7c 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_3': PERCENTAGE(POSITIVE(s_acctbal))}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_acctbal': t1.s_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index e759b898f..b9e46e082 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 6ec721f0f..58a14443e 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,14 +1,14 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=s_acctbal >= 0.0:numeric & CONTAINS(s_comment, 'careful':string), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) FILTER(condition=YEAR(l_shipdate) == 1995:numeric & l_shipmode == 'SHIP':string, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'tomato':string) & STARTSWITH(p_container, 'LG':string), columns={'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/suppliers_bal_diffs.txt b/tests/test_plan_refsols/suppliers_bal_diffs.txt index 998e0dde9..5257873f3 100644 --- a/tests/test_plan_refsols/suppliers_bal_diffs.txt +++ b/tests/test_plan_refsols/suppliers_bal_diffs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name), ('region_name', r_name), ('acctbal_delta', s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last]))], orderings=[(s_acctbal - PREV(args=[s_acctbal], partition=[n_regionkey], order=[(s_acctbal):asc_last])):desc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name, 's_acctbal': t1.s_acctbal, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 2e3d7779c..00dcdb1fe 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,13 +1,13 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_device_id': t0.in_device_id}) + JOIN(condition=t0.in_error_id == t1.er_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'in_device_id': t0.in_device_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) FILTER(condition=er_name == 'Battery Failure':string, columns={'er_id': er_id}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) diff --git a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt index 58825c242..0a986eb55 100644 --- a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt +++ b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', co_name), ('n_other_countries', n_other_countries)], orderings=[(co_name):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) SCAN(table=main.COUNTRIES, columns={'co_name': co_name}) AGGREGATE(keys={}, aggregations={'n_other_countries': COUNT()}) SCAN(table=main.COUNTRIES, columns={}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 9db93bb71..f81eb8d36 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,12 +1,12 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 398921e1f..1cf03558d 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,21 +1,21 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) - JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index f8470f2bb..b66d05d00 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,10 +1,10 @@ ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'in_error_id': t0.in_error_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'in_error_id': t0.in_error_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index b1510489f..b2c4d2f9a 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,10 +1,10 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) diff --git a/tests/test_plan_refsols/technograph_global_incident_rate.txt b/tests/test_plan_refsols/technograph_global_incident_rate.txt index a6d1dd70c..5d6365fb4 100644 --- a/tests/test_plan_refsols/technograph_global_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_global_incident_rate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('ir', ROUND(n_rows / agg_1, 2:numeric))], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_hot_purchase_window.txt b/tests/test_plan_refsols/technograph_hot_purchase_window.txt index 45b2922d1..f325373f3 100644 --- a/tests/test_plan_refsols/technograph_hot_purchase_window.txt +++ b/tests/test_plan_refsols/technograph_hot_purchase_window.txt @@ -1,7 +1,7 @@ ROOT(columns=[('start_of_period', ca_dt), ('n_purchases', n_purchases)], orderings=[(n_purchases):desc_last, (ca_dt):asc_first], limit=1:numeric) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_purchases': COUNT()}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t1.ca_dt < DATETIME(t0.ca_dt, '+5 days':string) & t1.ca_dt >= t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t1.ca_dt < DATETIME(t0.ca_dt, '+5 days':string) & t1.ca_dt >= t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt index 052d1893e..582f162a3 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_by_release_year.txt @@ -1,13 +1,13 @@ ROOT(columns=[('year', release_year), ('ir', ROUND(DEFAULT_TO(n_rows, 0:numeric) / sum_n_rows, 2:numeric))], orderings=[(release_year):asc_first]) - JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.release_year == t1.release_year, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'release_year': t0.release_year, 'sum_n_rows': t0.sum_n_rows}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t0.n_rows, 'pr_release': t1.pr_release}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) AGGREGATE(keys={'release_year': YEAR(pr_release)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, columns={'pr_release': t0.pr_release}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) + JOIN(condition=t0.de_id == t1.in_device_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'pr_release': t0.pr_release}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_release': t1.pr_release}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_release': pr_release}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 8cd9a28ea..7af10fc70 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,7 +1,7 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 701f153d4..80a9a18fe 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -1,13 +1,13 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0':string))), ('ir', ROUND(1000000.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(sum_expr_3, 0:numeric), 2:numeric))], orderings=[(month):asc_first]) AGGREGATE(keys={'month': MONTH(ca_dt), 'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) - JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) @@ -15,12 +15,12 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year, LPAD(month, 2:numeric, '0 FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) - JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'de_id': t0.de_id}) + JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=co_name == 'CN':string, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 2bd09c500..7abb7f5b8 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index d7cadbc00..601a5a746 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,24 +1,24 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) - JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) + JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'year_1': YEAR(ca_dt)}, aggregations={'sum_expr_4': SUM(expr_4), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_4': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 2f06f93d8..b1f19bec2 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -1,14 +1,14 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_3, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_3, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_expr_3, 0:numeric)), ('incidents', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(year):asc_first]) FILTER(condition=DEFAULT_TO(sum_expr_3, 0:numeric) > 0:numeric, columns={'sum_expr_3': sum_expr_3, 'sum_n_rows': sum_n_rows, 'year': year}) AGGREGATE(keys={'year': YEAR(ca_dt)}, aggregations={'sum_expr_3': SUM(expr_3), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'expr_3': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index b24f78d73..716f03473 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 7d8c385c4..5983915cd 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index 8d72b9471..f9ae80b9e 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 71b4e801c..5bd99fb20 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,9 +1,9 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=l_returnflag == 'R':string, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 802eb8aa3..a10aea7bc 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,16 +1,16 @@ ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(DEFAULT_TO(sum_expr_2, 0:numeric)):desc_last], limit=10:numeric) - JOIN(condition=DEFAULT_TO(t1.sum_expr_2, 0:numeric) > DEFAULT_TO(t0.sum_metric, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_2, 0:numeric) > DEFAULT_TO(t0.sum_metric, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr_2': t1.sum_expr_2}) AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr_2': SUM(ps_supplycost * ps_availqty)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q12.txt b/tests/test_plan_refsols/tpch_q12.txt index e12415ca9..1d3fec99b 100644 --- a/tests/test_plan_refsols/tpch_q12.txt +++ b/tests/test_plan_refsols/tpch_q12.txt @@ -1,6 +1,6 @@ ROOT(columns=[('L_SHIPMODE', l_shipmode), ('HIGH_LINE_COUNT', DEFAULT_TO(sum_is_high_priority, 0:numeric)), ('LOW_LINE_COUNT', DEFAULT_TO(sum_expr_2, 0:numeric))], orderings=[(l_shipmode):asc_first]) AGGREGATE(keys={'l_shipmode': l_shipmode}, aggregations={'sum_expr_2': SUM(NOT(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))), 'sum_is_high_priority': SUM(ISIN(o_orderpriority, ['1-URGENT', '2-HIGH']:array[unknown]))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_shipmode': t0.l_shipmode, 'o_orderpriority': t1.o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate & l_shipdate < l_commitdate & YEAR(l_receiptdate) == 1994:numeric & l_shipmode == 'MAIL':string | l_shipmode == 'SHIP':string, columns={'l_orderkey': l_orderkey, 'l_shipmode': l_shipmode}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipdate': l_shipdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index ecf4d93a1..2646b1149 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q14.txt b/tests/test_plan_refsols/tpch_q14.txt index ea3385773..c4c8c588b 100644 --- a/tests/test_plan_refsols/tpch_q14.txt +++ b/tests/test_plan_refsols/tpch_q14.txt @@ -1,6 +1,6 @@ ROOT(columns=[('PROMO_REVENUE', 100.0:numeric * DEFAULT_TO(sum_promo_value, 0:numeric) / DEFAULT_TO(sum_value, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_promo_value': SUM(IFF(STARTSWITH(p_type, 'PROMO':string), l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'p_type': t1.p_type}) FILTER(condition=MONTH(l_shipdate) == 9:numeric & YEAR(l_shipdate) == 1995:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index b42afa376..b7aa4880a 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,8 +1,8 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q16.txt b/tests/test_plan_refsols/tpch_q16.txt index 07c0cfd97..8d7d5de72 100644 --- a/tests/test_plan_refsols/tpch_q16.txt +++ b/tests/test_plan_refsols/tpch_q16.txt @@ -1,7 +1,7 @@ ROOT(columns=[('P_BRAND', p_brand), ('P_TYPE', p_type), ('P_SIZE', p_size), ('SUPPLIER_COUNT', SUPPLIER_COUNT)], orderings=[(SUPPLIER_COUNT):desc_last, (p_brand):asc_first, (p_type):asc_first, (p_size):asc_first], limit=10:numeric) AGGREGATE(keys={'p_brand': p_brand, 'p_size': p_size, 'p_type': p_type}, aggregations={'SUPPLIER_COUNT': NDISTINCT(ps_suppkey)}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_brand': t1.p_brand, 'p_size': t1.p_size, 'p_type': t1.p_type, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=NOT(LIKE(s_comment, '%Customer%Complaints%':string)), columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_comment': s_comment, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q17.txt b/tests/test_plan_refsols/tpch_q17.txt index 25ea91c57..955986183 100644 --- a/tests/test_plan_refsols/tpch_q17.txt +++ b/tests/test_plan_refsols/tpch_q17.txt @@ -1,7 +1,7 @@ ROOT(columns=[('AVG_YEARLY', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / 7.0:numeric)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) FILTER(condition=l_quantity < 0.2:numeric * RELAVG(args=[l_quantity], partition=[l_partkey], order=[]), columns={'l_extendedprice': l_extendedprice}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity}) FILTER(condition=p_brand == 'Brand#23':string & p_container == 'MED BOX':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 8acc50868..9a3b55e20 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index 5f7aaab71..c3a94e9b4 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,6 +1,6 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index 135018c23..e097c9898 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,13 +1,13 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 'p_size': p_size, 'p_type': p_type}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_acctbal': t0.s_acctbal, 's_address': t0.s_address, 's_comment': t0.s_comment, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_nationkey': s_nationkey, 's_phone': s_phone, 's_suppkey': s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index 240330953..a09dd5caf 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,13 +1,13 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) - JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 8a3ad16ad..05eafcbc8 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,21 +1,21 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 64cf83a90..dbca0c73d 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,8 +1,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) - JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index fa8154ac2..0a7c9f97d 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/tpch_q4.txt b/tests/test_plan_refsols/tpch_q4.txt index 633e0d21e..c2fe0f9a6 100644 --- a/tests/test_plan_refsols/tpch_q4.txt +++ b/tests/test_plan_refsols/tpch_q4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('O_ORDERPRIORITY', o_orderpriority), ('ORDER_COUNT', ORDER_COUNT)], orderings=[(o_orderpriority):asc_first]) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={'ORDER_COUNT': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) FILTER(condition=QUARTER(o_orderdate) == 3:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 4ecd8c14c..6e4183624 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,10 +1,10 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) @@ -12,6 +12,6 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:n FILTER(condition=o_orderdate < datetime.date(1995, 1, 1):datetime & o_orderdate >= datetime.date(1994, 1, 1):datetime, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 14542215b..813d42b01 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,14 +1,14 @@ ROOT(columns=[('SUPP_NATION', supp_nation), ('CUST_NATION', n_name), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(supp_nation):asc_first, (n_name):asc_first, (l_year):asc_first]) AGGREGATE(keys={'l_year': YEAR(l_shipdate), 'n_name': n_name, 'supp_nation': supp_nation}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index e19874147..f27de3c5c 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,20 +1,20 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index b82a25527..d032c5bc1 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -1,13 +1,13 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(n_name):asc_first, (o_year):desc_last], limit=10:numeric) AGGREGATE(keys={'n_name': n_name, 'o_year': YEAR(o_orderdate)}, aggregations={'sum_value': SUM(l_extendedprice * 1:numeric - l_discount - ps_supplycost * l_quantity)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 3d481e5fb..067b2808d 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -2,24 +2,24 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[( AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index 49fed6fa7..c98d2d967 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index 28100ecde..dad30206d 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, columns={'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index af0c8e206..3e326b484 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_4.txt b/tests/test_plan_refsols/window_filter_order_4.txt index 000297593..f4240ff57 100644 --- a/tests/test_plan_refsols/window_filter_order_4.txt +++ b/tests/test_plan_refsols/window_filter_order_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_5.txt b/tests/test_plan_refsols/window_filter_order_5.txt index 71a43eb3f..3aff8fad4 100644 --- a/tests/test_plan_refsols/window_filter_order_5.txt +++ b/tests/test_plan_refsols/window_filter_order_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[DEFAULT_TO(c_acctbal, 0:numeric)], partition=[], order=[]) & PRESENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'expr_0': 1:numeric}) diff --git a/tests/test_plan_refsols/window_filter_order_6.txt b/tests/test_plan_refsols/window_filter_order_6.txt index 71a43eb3f..3aff8fad4 100644 --- a/tests/test_plan_refsols/window_filter_order_6.txt +++ b/tests/test_plan_refsols/window_filter_order_6.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[DEFAULT_TO(c_acctbal, 0:numeric)], partition=[], order=[]) & PRESENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal, 'expr_0': t1.expr_0}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'expr_0': 1:numeric}) diff --git a/tests/test_plan_refsols/window_filter_order_7.txt b/tests/test_plan_refsols/window_filter_order_7.txt index db0c030a8..4d41e7023 100644 --- a/tests/test_plan_refsols/window_filter_order_7.txt +++ b/tests/test_plan_refsols/window_filter_order_7.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELAVG(args=[c_acctbal], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t1.c_acctbal}) FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 8a6c6d52e..30462c7cd 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 0a1837981..642f47f0a 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[total_spent], partition=[], order=[]) & ABSENT(expr_0), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'expr_0': t1.expr_0, 'o_totalprice': t0.o_totalprice, 'total_spent': t1.total_spent}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index ddf6fd292..c3c89419b 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,4 +1,4 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index 5b0a9e60f..dbbba0f08 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,4 +1,4 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/year_month_nation_orders.txt b/tests/test_plan_refsols/year_month_nation_orders.txt index b91e4cd3c..8af9a411d 100644 --- a/tests/test_plan_refsols/year_month_nation_orders.txt +++ b/tests/test_plan_refsols/year_month_nation_orders.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', n_name), ('order_year', order_year), ('order_month', order_month), ('n_orders', n_orders)], orderings=[(n_orders):desc_last], limit=5:numeric) AGGREGATE(keys={'n_name': n_name, 'order_month': MONTH(o_orderdate), 'order_year': YEAR(o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey}) FILTER(condition=ISIN(r_name, ['ASIA', 'AFRICA']:array[unknown]), columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) From 65697e89cfd9f1652566390c493ad2b1a3004829 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 14:23:59 -0400 Subject: [PATCH 88/97] Added reverse cardinality based column pruning [RUN CI] [RUN MYSQL] --- pydough/conversion/filter_pushdown.py | 16 +++-- .../relational_nodes/column_pruner.py | 67 +++++++++++++------ .../access_partition_child_after_filter.txt | 6 +- ..._partition_child_filter_backref_filter.txt | 2 +- .../agg_parts_by_type_backref_global.txt | 2 +- .../aggregation_analytics_1.txt | 4 +- .../aggregation_analytics_2.txt | 2 +- .../aggregation_analytics_3.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 4 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 4 +- tests/test_plan_refsols/common_prefix_ag.txt | 4 +- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 2 +- tests/test_plan_refsols/common_prefix_aj.txt | 4 +- tests/test_plan_refsols/common_prefix_ak.txt | 4 +- tests/test_plan_refsols/common_prefix_al.txt | 4 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 4 +- tests/test_plan_refsols/common_prefix_ao.txt | 2 +- tests/test_plan_refsols/common_prefix_l.txt | 2 +- tests/test_plan_refsols/common_prefix_s.txt | 2 +- tests/test_plan_refsols/common_prefix_t.txt | 2 +- tests/test_plan_refsols/common_prefix_u.txt | 2 +- tests/test_plan_refsols/correl_11.txt | 2 +- tests/test_plan_refsols/correl_12.txt | 2 +- tests/test_plan_refsols/correl_17.txt | 2 +- tests/test_plan_refsols/correl_18.txt | 2 +- tests/test_plan_refsols/correl_19.txt | 4 +- tests/test_plan_refsols/correl_21.txt | 2 +- tests/test_plan_refsols/correl_22.txt | 2 +- tests/test_plan_refsols/correl_23.txt | 2 +- tests/test_plan_refsols/correl_24.txt | 2 +- tests/test_plan_refsols/correl_25.txt | 2 +- tests/test_plan_refsols/correl_27.txt | 4 +- tests/test_plan_refsols/correl_28.txt | 4 +- tests/test_plan_refsols/correl_29.txt | 8 +-- tests/test_plan_refsols/correl_3.txt | 2 +- tests/test_plan_refsols/correl_34.txt | 2 +- tests/test_plan_refsols/correl_36.txt | 56 +++++++--------- tests/test_plan_refsols/correl_4.txt | 2 +- tests/test_plan_refsols/correl_5.txt | 4 +- tests/test_plan_refsols/double_cross.txt | 4 +- .../epoch_culture_events_info.txt | 10 +-- .../epoch_events_per_season.txt | 2 +- .../epoch_intra_season_searches.txt | 6 +- .../epoch_num_predawn_cold_war.txt | 4 +- ...ping_event_search_other_users_per_user.txt | 2 +- .../epoch_pct_searches_per_tod.txt | 2 +- .../epoch_search_results_by_tod.txt | 2 +- .../test_plan_refsols/join_region_nations.txt | 4 +- .../join_region_nations_customers.txt | 6 +- .../lineitem_regional_shipments2.txt | 2 +- .../lines_german_supplier_economy_part.txt | 2 +- .../month_year_sliding_windows.txt | 14 ++-- .../mostly_positive_accounts_per_nation1.txt | 2 +- .../mostly_positive_accounts_per_nation2.txt | 2 +- .../mostly_positive_accounts_per_nation3.txt | 2 +- .../multi_partition_access_2.txt | 2 +- .../multi_partition_access_3.txt | 19 ++---- .../multi_partition_access_4.txt | 2 +- .../multi_partition_access_5.txt | 14 ++-- .../multi_partition_access_6.txt | 48 +++---------- .../nation_name_contains_region_name.txt | 2 +- .../odate_and_rdate_avggap.txt | 2 +- .../order_by_before_join.txt | 4 +- tests/test_plan_refsols/pagerank_a1.txt | 2 +- tests/test_plan_refsols/pagerank_a2.txt | 2 +- tests/test_plan_refsols/pagerank_a6.txt | 2 +- tests/test_plan_refsols/pagerank_b3.txt | 2 +- tests/test_plan_refsols/pagerank_c4.txt | 2 +- tests/test_plan_refsols/pagerank_d5.txt | 2 +- tests/test_plan_refsols/pagerank_h8.txt | 2 +- tests/test_plan_refsols/part_cross_part_a.txt | 2 +- tests/test_plan_refsols/part_cross_part_b.txt | 4 +- tests/test_plan_refsols/part_cross_part_c.txt | 4 +- .../percentile_customers_per_region.txt | 4 +- .../quantile_function_test_1.txt | 6 +- .../rank_customers_per_region.txt | 4 +- .../rank_nations_per_region_by_customers.txt | 4 +- .../region_nation_window_aggs.txt | 6 +- .../regional_suppliers_percentile.txt | 4 +- tests/test_plan_refsols/simple_cross_10.txt | 4 +- tests/test_plan_refsols/simple_cross_11.txt | 2 +- tests/test_plan_refsols/simple_cross_2.txt | 2 +- tests/test_plan_refsols/simple_cross_4.txt | 2 +- tests/test_plan_refsols/simple_cross_6.txt | 2 +- tests/test_plan_refsols/simple_cross_7.txt | 2 +- tests/test_plan_refsols/simple_cross_8.txt | 4 +- tests/test_plan_refsols/simple_cross_9.txt | 4 +- ...chnograph_country_combination_analysis.txt | 2 +- ...umulative_incident_rate_goldcopperstar.txt | 6 +- ..._year_cumulative_incident_rate_overall.txt | 4 +- tests/test_plan_refsols/tpch_q19.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q22.txt | 4 +- tests/test_plan_refsols/tpch_q3.txt | 2 +- tests/test_plan_refsols/tpch_q7.txt | 4 +- tests/test_plan_refsols/tpch_q8.txt | 4 +- .../window_sliding_frame_relsize.txt | 4 +- .../window_sliding_frame_relsum.txt | 4 +- tests/test_pydough_from_string.py | 1 + tests/test_sql_refsols/correl_36_sqlite.sql | 23 ++----- .../test_sql_refsols/quantile_test_1_ansi.sql | 9 ++- .../quantile_test_1_mysql.sql | 11 ++- .../quantile_test_1_sqlite.sql | 11 ++- .../window_functions_ansi.sql | 4 +- .../window_functions_mysql.sql | 4 +- .../window_functions_sqlite.sql | 4 +- .../window_sliding_frame_relsize_ansi.sql | 24 +++---- .../window_sliding_frame_relsize_mysql.sql | 24 +++---- .../window_sliding_frame_relsize_sqlite.sql | 24 +++---- .../window_sliding_frame_relsum_ansi.sql | 24 +++---- .../window_sliding_frame_relsum_mysql.sql | 24 +++---- .../window_sliding_frame_relsum_sqlite.sql | 24 +++---- 115 files changed, 324 insertions(+), 413 deletions(-) diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index c08be33d2..e247bd132 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -185,6 +185,7 @@ def visit_join(self, join: Join) -> RelationalNode: # The join type, cardinality, and inputs for the output join node. join_type: JoinType = join.join_type cardinality: JoinCardinality = join.cardinality + reverse_cardinality: JoinCardinality = join.reverse_cardinality new_inputs: list[RelationalNode] = [] # If the join type is LEFT or SEMI but the condition is TRUE, convert it @@ -239,10 +240,15 @@ def visit_join(self, join: Join) -> RelationalNode: remaining_filters, lambda expr: only_references_columns(expr, input_cols[idx]), ) - # Ensure that if any filter is pushed into an input (besides - # the first input) that the join is marked as filtering. - if len(pushable_filters) > 0 and idx > 0: - cardinality = join.cardinality.add_filter() + # Ensure that if any filter is pushed into an input, the + # corresponding join cardinality is updated to reflect that a filter + # has been applied. + if len(pushable_filters) > 0: + if idx == 1: + cardinality = join.cardinality.add_filter() + else: + reverse_cardinality = reverse_cardinality.add_filter() + # Do the same pushable_filters = { expr.accept_shuttle(transposer) for expr in pushable_filters } @@ -270,6 +276,7 @@ def visit_join(self, join: Join) -> RelationalNode: else: new_conjunction.add(join._condition) cardinality = join.cardinality.add_filter() + reverse_cardinality = join.reverse_cardinality.add_filter() join._condition = RelationalExpression.form_conjunction( sorted(new_conjunction, key=repr) ) @@ -280,6 +287,7 @@ def visit_join(self, join: Join) -> RelationalNode: new_node = join.copy(inputs=new_inputs) assert isinstance(new_node, Join) new_node.cardinality = cardinality + new_node.reverse_cardinality = reverse_cardinality new_node.join_type = join_type return build_filter(new_node, remaining_filters) diff --git a/pydough/relational/relational_nodes/column_pruner.py b/pydough/relational/relational_nodes/column_pruner.py index 1d4dc28c7..a49f05b31 100644 --- a/pydough/relational/relational_nodes/column_pruner.py +++ b/pydough/relational/relational_nodes/column_pruner.py @@ -150,31 +150,56 @@ def _prune_node_columns( # Special case: replace LEFT join where RHS is unused with LHS (only # possible if the join is used to bring 1:1 data into the rows of the # LHS, which is unecessary if no data is being brought). Also do the - # same for inner joins that meet certain criteria. - if isinstance(output, Join) and ( - (output.join_type == JoinType.LEFT) - or ( + # same for inner joins that meet certain criteria. Do the same with + # inner joins where the left side is unused and the data is singular + # and non-filtering with regards to the right side. + if isinstance(output, Join): + prune_left: bool = ( + output.join_type == JoinType.INNER + and output.reverse_cardinality == JoinCardinality.SINGULAR_ACCESS + ) + prune_right: bool = (output.join_type == JoinType.LEFT) or ( output.join_type == JoinType.INNER and output.cardinality == JoinCardinality.SINGULAR_ACCESS ) - ): - uses_rhs: bool = False - for column in output.columns.values(): - if ( - isinstance(column, ColumnReference) - and column.input_name == output.default_input_aliases[1] - ): - uses_rhs = True - break - if not uses_rhs: + if prune_left or prune_right: + uses_lhs: bool = False + uses_rhs: bool = False + for column in output.columns.values(): + if ( + isinstance(column, ColumnReference) + and column.input_name == output.default_input_aliases[0] + ): + uses_lhs = True + if ( + isinstance(column, ColumnReference) + and column.input_name == output.default_input_aliases[1] + ): + uses_rhs = True + if uses_lhs and uses_rhs: + break + new_columns: dict[str, RelationalExpression] = {} - for column_name, column_val in output.columns.items(): - assert isinstance(column_val, ColumnReference) - new_columns[column_name] = output.inputs[0].columns[column_val.name] - if isinstance(output.inputs[0], Aggregate): - for key in output.inputs[0].keys: - new_columns[key] = output.inputs[0].keys[key] - output = output.inputs[0].copy(columns=new_columns) + if prune_right and not uses_rhs: + for column_name, column_val in output.columns.items(): + assert isinstance(column_val, ColumnReference) + new_columns[column_name] = output.inputs[0].columns[ + column_val.name + ] + if isinstance(output.inputs[0], Aggregate): + for key in output.inputs[0].keys: + new_columns[key] = output.inputs[0].keys[key] + output = output.inputs[0].copy(columns=new_columns) + elif prune_left and not uses_lhs: + for column_name, column_val in output.columns.items(): + assert isinstance(column_val, ColumnReference) + new_columns[column_name] = output.inputs[1].columns[ + column_val.name + ] + if isinstance(output.inputs[1], Aggregate): + for key in output.inputs[1].keys: + new_columns[key] = output.inputs[1].keys[key] + output = output.inputs[1].copy(columns=new_columns) return output, correl_refs diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index 76c2d9652..e0889ae05 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,6 +1,2 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) - FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) - AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) - SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt index 9527b7c1a..fd9934912 100644 --- a/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_filter_backref_filter.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + JOIN(condition=t1.p_retailprice < t0.avg_price & t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt index 3a3ff6bd6..96764d11e 100644 --- a/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt +++ b/tests/test_plan_refsols/agg_parts_by_type_backref_global.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_type', p_type), ('percentage_of_parts', n_rows / total_num_parts), ('avg_price', avg_p_retailprice)], orderings=[]) - JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) + JOIN(condition=t1.avg_p_retailprice >= t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 'p_type': t1.p_type, 'total_num_parts': t0.total_num_parts}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice), 'total_num_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/aggregation_analytics_1.txt b/tests/test_plan_refsols/aggregation_analytics_1.txt index 58e0deb0c..5313d48d0 100644 --- a/tests/test_plan_refsols/aggregation_analytics_1.txt +++ b/tests/test_plan_refsols/aggregation_analytics_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=8:numeric) JOIN(condition=t0.ps_partkey == t1.ps_partkey & t0.ps_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t0.p_name, 'sum_revenue': t1.sum_revenue}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) @@ -9,7 +9,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000009450':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index cb721fe2a..2ef53d9c2 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -2,7 +2,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_ JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 8d3788bf3..339aaf5d5 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -2,7 +2,7 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_reve JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) FILTER(condition=s_name == 'Supplier#000000182':string, columns={'s_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index a0a95a823..452929916 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -2,8 +2,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index a0a95a823..452929916 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -2,8 +2,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index da2b4c0c0..edcdf502e 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,8 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index a3e99e82a..0ac56433a 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index 1e859202f..8fa8c59a8 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_high_orders', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 4448ee711..38b744b45 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index ca09a6735..d610b63d5 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_revenue': t1.sum_sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index ab4e235dd..733673099 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n_machine_high_domestic_lines', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(anything_n_name):asc_first]) FILTER(condition=sum_n_rows > 0:numeric & sum_sum_n_rows > 0:numeric, columns={'anything_n_name': anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_machine_cust', n_rows), ('n_ FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey & t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index f7b11e162..f674f8512 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -4,9 +4,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index 1ed0ccd60..eb6b60f1f 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -3,9 +3,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numer LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=sum_agg_3 > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index 396066d64..cfab97d0b 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -3,9 +3,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 91bfe0dcf..508b413ff 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -4,7 +4,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 166af0404..c03ee50fe 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 85f114b4e..2dbcd5430 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 4592b0af2..873834947 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 8bb6e9ff8..1a4c5e05e 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/correl_11.txt b/tests/test_plan_refsols/correl_11.txt index b2cdc198b..03eea0352 100644 --- a/tests/test_plan_refsols/correl_11.txt +++ b/tests/test_plan_refsols/correl_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=t0.p_brand == t1.p_brand & t1.p_retailprice > 1.4:numeric * t0.avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_brand': t0.p_brand}) AGGREGATE(keys={'p_brand': p_brand}, aggregations={'avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_12.txt b/tests/test_plan_refsols/correl_12.txt index 3fceabdc2..dcf0d68f6 100644 --- a/tests/test_plan_refsols/correl_12.txt +++ b/tests/test_plan_refsols/correl_12.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', p_brand)], orderings=[(p_brand):asc_first]) AGGREGATE(keys={'p_brand': p_brand}, aggregations={}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_brand': t0.p_brand}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price & t0.p_brand == t1.p_brand & t1.p_retailprice > t0.brand_avg_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_brand': t0.p_brand}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'brand_avg_price': t1.brand_avg_price, 'global_avg_price': t0.global_avg_price, 'p_brand': t1.p_brand}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_17.txt b/tests/test_plan_refsols/correl_17.txt index 7c1c086c4..a62a84c23 100644 --- a/tests/test_plan_refsols/correl_17.txt +++ b/tests/test_plan_refsols/correl_17.txt @@ -1,4 +1,4 @@ ROOT(columns=[('fullname', JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name)))], orderings=[(JOIN_STRINGS('-':string, LOWER(r_name), LOWER(n_name))):asc_first]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_18.txt b/tests/test_plan_refsols/correl_18.txt index 75910fb98..ff21980c3 100644 --- a/tests/test_plan_refsols/correl_18.txt +++ b/tests/test_plan_refsols/correl_18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', sum_n_above_avg)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_n_above_avg': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_totalprice >= 0.5:numeric * DEFAULT_TO(t0.sum_o_totalprice, 0:numeric), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=n_rows > 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}, aggregations={'n_rows': COUNT(), 'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) == 1993:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_19.txt b/tests/test_plan_refsols/correl_19.txt index 453727b40..99566c0f2 100644 --- a/tests/test_plan_refsols/correl_19.txt +++ b/tests/test_plan_refsols/correl_19.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', supplier_name), ('n_super_cust', n_super_cust)], orderings=[(n_super_cust):desc_last], limit=5:numeric) AGGREGATE(keys={'s_suppkey': s_suppkey}, aggregations={'n_super_cust': COUNT(), 'supplier_name': ANYTHING(s_name)}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_acctbal': t0.s_acctbal, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/correl_21.txt b/tests/test_plan_refsols/correl_21.txt index 8a6cdde8d..2173cadb5 100644 --- a/tests/test_plan_refsols/correl_21.txt +++ b/tests/test_plan_refsols/correl_21.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_parts, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_parts': AVG(n_parts)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_parts': COUNT()}) SCAN(table=tpch.PART, columns={'p_size': p_size}) diff --git a/tests/test_plan_refsols/correl_22.txt b/tests/test_plan_refsols/correl_22.txt index aad7e12c4..2d7675c4e 100644 --- a/tests/test_plan_refsols/correl_22.txt +++ b/tests/test_plan_refsols/correl_22.txt @@ -1,6 +1,6 @@ ROOT(columns=[('container', p_container), ('n_types', n_types)], orderings=[(n_types):desc_last, (p_container):asc_first], limit=5:numeric) AGGREGATE(keys={'p_container': p_container}, aggregations={'n_types': COUNT()}) - JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_container': t1.p_container}) + JOIN(condition=t1.avg_p_retailprice > t0.global_avg_price, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_container': t1.p_container}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) AGGREGATE(keys={'p_container': p_container, 'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_23.txt b/tests/test_plan_refsols/correl_23.txt index 8622ac925..1862339e7 100644 --- a/tests/test_plan_refsols/correl_23.txt +++ b/tests/test_plan_refsols/correl_23.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_sizes', n_sizes)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_sizes': COUNT()}) - JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t1.n_rows > t0.avg_n_combo, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) AGGREGATE(keys={}, aggregations={'avg_n_combo': AVG(n_combos)}) AGGREGATE(keys={'p_size': p_size}, aggregations={'n_combos': COUNT()}) AGGREGATE(keys={'p_container': p_container, 'p_size': p_size, 'p_type': p_type}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index d5b649cbd..f93b3feb8 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year), ('month', month), ('n_orders_in_range', n_orders_in_range)], orderings=[(year):asc_first, (month):asc_first]) AGGREGATE(keys={'month': month, 'year': year}, aggregations={'n_orders_in_range': COUNT()}) - JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'year': t0.year}) + JOIN(condition=t0.month == MONTH(t1.o_orderdate) & t0.year == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'year': t0.year}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month': month, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year):asc_last, (month):asc_last]), 'year': year}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index e71563e9c..0c7b3fa53 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -2,7 +2,7 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regio AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name), 'n_urgent_semi_domestic_rail_orders': NDISTINCT(l_orderkey)}) JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 4e8bf9fda..05490b4f1 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index a4726946f..862466ad1 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -3,8 +3,8 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index fac5389cf..78b655da0 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,9 +1,9 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) @@ -12,7 +12,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_acctbal': MAX(c_acctbal), 'min_c_acctbal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index efeaf72d2..475b9a565 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -4,7 +4,7 @@ ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeri AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=r_regionkey == anything_n_regionkey, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index e10964b38..eba9a0a50 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={}) FILTER(condition=l_orderkey == o_orderkey & ps_partkey == l_partkey & ps_suppkey == l_suppkey & o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]) | RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) == 1:numeric, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_36.txt b/tests/test_plan_refsols/correl_36.txt index bb518918c..6e6606de9 100644 --- a/tests/test_plan_refsols/correl_36.txt +++ b/tests/test_plan_refsols/correl_36.txt @@ -1,33 +1,27 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.key_12, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}) - SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) - JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) - FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'key_12': key_12, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey}, aggregations={}) + JOIN(condition=t0.p_type == t1.p_type & t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.key_12, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_12': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) + FILTER(condition=YEAR(l_shipdate) == 1998:numeric, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index f8778c6d6..8f72568f4 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/correl_5.txt b/tests/test_plan_refsols/correl_5.txt index 1ae689fe3..8f28d21ea 100644 --- a/tests/test_plan_refsols/correl_5.txt +++ b/tests/test_plan_refsols/correl_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', name)], orderings=[(name):asc_first]) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'name': ANYTHING(r_name)}) - JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) + JOIN(condition=t1.s_acctbal <= t0.smallest_bal + 4.0:numeric & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'smallest_bal': t0.smallest_bal}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey, 'smallest_bal': t0.smallest_bal}) AGGREGATE(keys={}, aggregations={'smallest_bal': MIN(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal}) diff --git a/tests/test_plan_refsols/double_cross.txt b/tests/test_plan_refsols/double_cross.txt index 0b5722202..52322ffa3 100644 --- a/tests/test_plan_refsols/double_cross.txt +++ b/tests/test_plan_refsols/double_cross.txt @@ -1,9 +1,9 @@ ROOT(columns=[('wk', ord_wk), ('n_lines', n_rows), ('n_orders', anything_n_orders), ('lpo', ROUND(RELSUM(args=[n_rows], partition=[], order=[(line_wk):asc_last], cumulative=True) / RELSUM(args=[anything_n_orders], partition=[], order=[(ord_wk):asc_last], cumulative=True), 4:numeric))], orderings=[(ord_wk):asc_first]) AGGREGATE(keys={'line_wk': DATEDIFF('week':string, min_date, l_receiptdate), 'ord_wk': ord_wk}, aggregations={'anything_n_orders': ANYTHING(n_orders), 'n_rows': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_date, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_date': t0.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'min_date': t1.min_date, 'n_orders': t0.n_orders, 'ord_wk': t0.ord_wk}) AGGREGATE(keys={'ord_wk': DATEDIFF('week':string, min_date, o_orderdate)}, aggregations={'n_orders': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=DATEDIFF('week':string, t0.min_date, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'min_date': t0.min_date, 'o_orderdate': t1.o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) FILTER(condition=o_orderpriority == '1-URGENT':string & o_orderstatus == 'F':string, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index 63f0f6776..e26cfdcde 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -1,13 +1,13 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEAR(ev_dt)), ('season_name', s_name), ('tod', t_name)], orderings=[(ev_dt):asc_first], limit=6:numeric) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) - JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_name': t0.ev_name, 's_name': t0.s_name, 't_name': t1.t_name}) + JOIN(condition=t0.ev_key == t1.ev_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'er_name': t0.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name, 's_name': t1.s_name}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'er_name': t1.er_name, 'ev_dt': t0.ev_dt, 'ev_key': t0.ev_key, 'ev_name': t0.ev_name}) FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_events_per_season.txt b/tests/test_plan_refsols/epoch_events_per_season.txt index 3e35da6bd..7220d48f4 100644 --- a/tests/test_plan_refsols/epoch_events_per_season.txt +++ b/tests/test_plan_refsols/epoch_events_per_season.txt @@ -1,5 +1,5 @@ ROOT(columns=[('season_name', s_name), ('n_events', n_events)], orderings=[(n_events):desc_last, (s_name):asc_first]) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_events': COUNT()}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 471d3e119..18a067096 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -1,8 +1,8 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numeric * DEFAULT_TO(agg_2, 0:numeric) / agg_3, 2:numeric)), ('pct_event_searches', ROUND(100.0:numeric * DEFAULT_TO(sum_is_intra_season, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(s_name):asc_first]) JOIN(condition=t0.s_name == t1.s_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_2': t0.sum_is_intra_season, 'agg_3': t0.n_rows, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_is_intra_season': t1.sum_is_intra_season}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(n_rows, 0:numeric) > 0:numeric)}) - JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) AGGREGATE(keys={'s_name': s_name, 'search_id': search_id}, aggregations={'n_rows': COUNT()}) @@ -16,7 +16,7 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numer AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index 225fc5ac2..a3a22d030 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) - JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) - JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_key': t0.ev_key}) + JOIN(condition=t1.er_start_year <= YEAR(t0.ev_dt) & YEAR(t0.ev_dt) < t1.er_end_year, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=er_name == 'Cold War':string, columns={'er_end_year': er_end_year, 'er_start_year': er_start_year}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index 3c0b8dc3a..22b8030f5 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -3,7 +3,7 @@ ROOT(columns=[('user_name', anything_user_name), ('n_other_users', n_other_users JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) - JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_user_id': search_user_id}) SCAN(table=EVENTS, columns={'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt index 88b48990a..25e99085d 100644 --- a/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt +++ b/tests/test_plan_refsols/epoch_pct_searches_per_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'t_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_search_results_by_tod.txt b/tests/test_plan_refsols/epoch_search_results_by_tod.txt index a4f00ca05..67a223a35 100644 --- a/tests/test_plan_refsols/epoch_search_results_by_tod.txt +++ b/tests/test_plan_refsols/epoch_search_results_by_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('pct_searches', ROUND(100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[], order=[]), 2:numeric)), ('avg_results', ROUND(avg_search_num_results, 2:numeric))], orderings=[(anything_t_start_hour):asc_first]) AGGREGATE(keys={'t_name': t_name}, aggregations={'anything_t_start_hour': ANYTHING(t_start_hour), 'avg_search_num_results': AVG(search_num_results), 'n_rows': COUNT()}) - JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) + JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'search_num_results': t1.search_num_results, 't_name': t0.t_name, 't_start_hour': t0.t_start_hour}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) SCAN(table=SEARCHES, columns={'search_num_results': search_num_results, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/join_region_nations.txt b/tests/test_plan_refsols/join_region_nations.txt index c319d0a25..192f6778a 100644 --- a/tests/test_plan_refsols/join_region_nations.txt +++ b/tests/test_plan_refsols/join_region_nations.txt @@ -1,4 +1,2 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/join_region_nations_customers.txt b/tests/test_plan_refsols/join_region_nations_customers.txt index 03148c27c..8f39f219c 100644 --- a/tests/test_plan_refsols/join_region_nations_customers.txt +++ b/tests/test_plan_refsols/join_region_nations_customers.txt @@ -1,6 +1,2 @@ ROOT(columns=[('key', c_custkey), ('name', c_name), ('address', c_address), ('nation_key', c_nationkey), ('phone', c_phone), ('account_balance', c_acctbal), ('market_segment', c_mktsegment), ('comment', c_comment)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_address': t1.c_address, 'c_comment': t1.c_comment, 'c_custkey': t1.c_custkey, 'c_mktsegment': t1.c_mktsegment, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'c_phone': t1.c_phone}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments2.txt b/tests/test_plan_refsols/lineitem_regional_shipments2.txt index b79ec8de3..f90e87c44 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments2.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('rname', r_name), ('price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name == t1.r_name & t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'r_name': t0.r_name}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'r_name': t1.r_name}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'r_name': t1.r_name}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index d8e5cd342..531abfce9 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index 132a9adcc..b8ba05d61 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,11 +1,5 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) - JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) - FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) - PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) - AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0), columns={'month': month_1, 'year': year_1}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index 84d08983f..e8baa1741 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 6b901bb6d..55cdf7f5d 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 2ab7abe26..6198ebc29 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 00be4afea..95593d4d1 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index 2cd523e09..e2cf74fb0 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,19 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpTickerId': t1.sbDpTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 2efeb070b..ded2ae63f 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t1.sbTxShares < t0.cust_max_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.cust_ticker_max_shares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_max_shares': t0.cust_max_shares, 'cust_ticker_max_shares': t1.cust_ticker_max_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_max_shares': MAX(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/multi_partition_access_5.txt b/tests/test_plan_refsols/multi_partition_access_5.txt index 3356a5d74..da5336fa0 100644 --- a/tests/test_plan_refsols/multi_partition_access_5.txt +++ b/tests/test_plan_refsols/multi_partition_access_5.txt @@ -1,13 +1,9 @@ ROOT(columns=[('transaction_id', sbTxId), ('n_ticker_type_trans', n_ticker_type_trans), ('n_ticker_trans', sum_n_ticker_type_trans_1), ('n_type_trans', sum_n_ticker_type_trans)], orderings=[(n_ticker_type_trans):asc_first, (sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) - JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxId': t1.sbTxId, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t0.sum_n_ticker_type_trans_1}) + JOIN(condition=t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans < 0.2:numeric & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans, 'sum_n_ticker_type_trans_1': t1.sum_n_ticker_type_trans}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t1.n_ticker_type_trans / t0.sum_n_ticker_type_trans > 0.8:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_n_ticker_type_trans': t0.sum_n_ticker_type_trans}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index e793349dc..871bb5b9f 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,47 +1,17 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxId': t1.sbTxId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_ticker_type_trans == 1:numeric | t0.n_cust_type_trans == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_cust_type_trans': t1.n_cust_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_cust_type_trans > 1:numeric, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_cust_type_trans': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t0.n_ticker_type_trans, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxType': t1.sbTxType}) + JOIN(condition=t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxType': sbTxType}) - AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': SUM(n_ticker_type_trans)}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxType': t1.sbTxType}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'n_ticker_type_trans': COUNT()}) - SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) + AGGREGATE(keys={'sbTxType': sbTxType}, aggregations={'sum_n_ticker_type_trans': COUNT()}) + SCAN(table=main.sbTransaction, columns={'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_ticker_type_trans': t1.n_ticker_type_trans, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) FILTER(condition=sum_n_ticker_type_trans > 1:numeric, columns={'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxTickerId': sbTxTickerId}, aggregations={'sum_n_ticker_type_trans': COUNT()}) SCAN(table=main.sbTransaction, columns={'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_plan_refsols/nation_name_contains_region_name.txt b/tests/test_plan_refsols/nation_name_contains_region_name.txt index 2244b257e..9d3c189f4 100644 --- a/tests/test_plan_refsols/nation_name_contains_region_name.txt +++ b/tests/test_plan_refsols/nation_name_contains_region_name.txt @@ -1,4 +1,4 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey & CONTAINS(t1.n_name, t0.r_name), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/odate_and_rdate_avggap.txt b/tests/test_plan_refsols/odate_and_rdate_avggap.txt index 84a9d4f04..9606c3407 100644 --- a/tests/test_plan_refsols/odate_and_rdate_avggap.txt +++ b/tests/test_plan_refsols/odate_and_rdate_avggap.txt @@ -1,6 +1,6 @@ ROOT(columns=[('avg_gap', avg_gap)], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_gap': AVG(DATEDIFF('days':string, o_orderdate, SMALLEST(l_commitdate, l_receiptdate)))}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_commitdate': t0.l_commitdate, 'l_receiptdate': t0.l_receiptdate, 'o_orderdate': t1.o_orderdate}) FILTER(condition=l_shipmode == 'RAIL':string, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_shipmode': l_shipmode}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/order_by_before_join.txt b/tests/test_plan_refsols/order_by_before_join.txt index c319d0a25..192f6778a 100644 --- a/tests/test_plan_refsols/order_by_before_join.txt +++ b/tests/test_plan_refsols/order_by_before_join.txt @@ -1,4 +1,2 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index d7c5bd416..5e682b661 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -4,7 +4,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0, 5:numeric))], ord JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index 9f476f893..25ca533af 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -8,7 +8,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_20, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index e67d91867..f670d3451 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -24,7 +24,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_590, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index a55bcc47a..faf53451e 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -12,7 +12,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_58, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index cf9b305ab..523047895 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -16,7 +16,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_134, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index 382de6bab..b8ae5bde8 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -20,7 +20,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_286, 5:numeric))], JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index 8551440bf..91c7ce5f2 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -32,7 +32,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank_0_2414, 5:numeric))] JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank_1, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank_1': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) - JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) + JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) PROJECT(columns={'n': RELSIZE(args=[], partition=[], order=[]), 'page_rank': 1.0:numeric / RELSIZE(args=[], partition=[], order=[]), 's_key': s_key}) SCAN(table=main.SITES, columns={'s_key': s_key}) SCAN(table=main.LINKS, columns={'l_source': l_source, 'l_target': l_target}) diff --git a/tests/test_plan_refsols/part_cross_part_a.txt b/tests/test_plan_refsols/part_cross_part_a.txt index c4402eb78..90ec89d6c 100644 --- a/tests/test_plan_refsols/part_cross_part_a.txt +++ b/tests/test_plan_refsols/part_cross_part_a.txt @@ -7,7 +7,7 @@ ROOT(columns=[('state', sbCustState), ('exchange', sbTickerExchange), ('n', DEFA SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId, 'sbCustState': sbCustState}) AGGREGATE(keys={'sbCustId': sbCustId, 'sbTickerExchange': sbTickerExchange}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) + JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) diff --git a/tests/test_plan_refsols/part_cross_part_b.txt b/tests/test_plan_refsols/part_cross_part_b.txt index f32335bba..6524835a9 100644 --- a/tests/test_plan_refsols/part_cross_part_b.txt +++ b/tests/test_plan_refsols/part_cross_part_b.txt @@ -7,8 +7,8 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(arg FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/part_cross_part_c.txt b/tests/test_plan_refsols/part_cross_part_c.txt index 9062336ea..9b061b966 100644 --- a/tests/test_plan_refsols/part_cross_part_c.txt +++ b/tests/test_plan_refsols/part_cross_part_c.txt @@ -8,8 +8,8 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/percentile_customers_per_region.txt b/tests/test_plan_refsols/percentile_customers_per_region.txt index 0000648c9..627b425c7 100644 --- a/tests/test_plan_refsols/percentile_customers_per_region.txt +++ b/tests/test_plan_refsols/percentile_customers_per_region.txt @@ -1,7 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(c_name):asc_first]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) == 95:numeric & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/quantile_function_test_1.txt b/tests/test_plan_refsols/quantile_function_test_1.txt index c460cafe5..3472faf9c 100644 --- a/tests/test_plan_refsols/quantile_function_test_1.txt +++ b/tests/test_plan_refsols/quantile_function_test_1.txt @@ -1,6 +1,4 @@ ROOT(columns=[('seventieth_order_price', seventieth_order_price)], orderings=[]) AGGREGATE(keys={}, aggregations={'seventieth_order_price': QUANTILE(o_totalprice, 0.7:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_totalprice': t1.o_totalprice}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/rank_customers_per_region.txt b/tests/test_plan_refsols/rank_customers_per_region.txt index 272d3f0a6..b21f4c0b5 100644 --- a/tests/test_plan_refsols/rank_customers_per_region.txt +++ b/tests/test_plan_refsols/rank_customers_per_region.txt @@ -1,6 +1,4 @@ ROOT(columns=[('nation_name', n_name), ('name', c_name), ('cust_rank', RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=True, dense=True))], orderings=[]) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 60f1383e8..610f76d3a 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,7 +1,5 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_nation_window_aggs.txt b/tests/test_plan_refsols/region_nation_window_aggs.txt index 0f1fb8476..219f9e2a2 100644 --- a/tests/test_plan_refsols/region_nation_window_aggs.txt +++ b/tests/test_plan_refsols/region_nation_window_aggs.txt @@ -1,5 +1,3 @@ ROOT(columns=[('nation_name', n_name), ('key_sum', RELSUM(args=[n_nationkey], partition=[n_regionkey], order=[])), ('key_avg', RELAVG(args=[n_nationkey], partition=[n_regionkey], order=[])), ('n_short_comment', RELCOUNT(args=[KEEP_IF(n_comment, LENGTH(n_comment) < 75:numeric)], partition=[n_regionkey], order=[])), ('n_nations', RELSIZE(args=[], partition=[n_regionkey], order=[]))], orderings=[(n_regionkey):asc_first, (n_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t1.n_comment, 'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=NOT(ISIN(SLICE(n_name, None:unknown, 1:numeric, None:unknown), ['A', 'E', 'I', 'O', 'U']:array[unknown])), columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index d968b9e59..93b25c240 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -2,9 +2,7 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index 4113ecab2..bdf4299bd 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -2,8 +2,8 @@ ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0: JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) - JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_11.txt b/tests/test_plan_refsols/simple_cross_11.txt index 27cff2d37..4b7aa81fd 100644 --- a/tests/test_plan_refsols/simple_cross_11.txt +++ b/tests/test_plan_refsols/simple_cross_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + JOIN(condition=t0.o_orderdate == t1.min_date, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={}, aggregations={'min_date': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/simple_cross_2.txt b/tests/test_plan_refsols/simple_cross_2.txt index a5f30f5fa..bf441b987 100644 --- a/tests/test_plan_refsols/simple_cross_2.txt +++ b/tests/test_plan_refsols/simple_cross_2.txt @@ -1,4 +1,4 @@ ROOT(columns=[('r1', r_name), ('r2', r2)], orderings=[(r_name):asc_first, (r2):asc_first]) - JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r2': t1.r_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r2': t1.r_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index 0dd20b88a..59617616d 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -2,6 +2,6 @@ ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0: JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_6.txt b/tests/test_plan_refsols/simple_cross_6.txt index 20c725da5..10bb19f6e 100644 --- a/tests/test_plan_refsols/simple_cross_6.txt +++ b/tests/test_plan_refsols/simple_cross_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_pairs', n_pairs)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_pairs': COUNT()}) - JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t1.o_custkey == t0.o_custkey & t1.o_orderdate == t0.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=INTEGER(SLICE(o_clerk, 6:numeric, None:unknown, None:unknown)) >= 900:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_7.txt b/tests/test_plan_refsols/simple_cross_7.txt index 78f1f71f4..69800f5c2 100644 --- a/tests/test_plan_refsols/simple_cross_7.txt +++ b/tests/test_plan_refsols/simple_cross_7.txt @@ -3,7 +3,7 @@ ROOT(columns=[('original_order_key', o_orderkey), ('n_other_orders', DEFAULT_TO( FILTER(condition=o_orderstatus == 'P':string, columns={'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.o_custkey & t0.o_orderdate == t1.o_orderdate & t1.o_orderkey > t0.o_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderkey': t0.o_orderkey}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) FILTER(condition=o_orderstatus == 'P':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/simple_cross_8.txt b/tests/test_plan_refsols/simple_cross_8.txt index b40fe6b0e..80804edb6 100644 --- a/tests/test_plan_refsols/simple_cross_8.txt +++ b/tests/test_plan_refsols/simple_cross_8.txt @@ -4,8 +4,8 @@ ROOT(columns=[('supplier_region', anything_supplier_region), ('customer_region', JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'key_2': t0.key_2, 'name_18': t1.r_name, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) - JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) + JOIN(condition=t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t0.key_2, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.supplier_region}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'key_2': t1.r_regionkey, 'r_name': t1.r_name, 'r_regionkey': t0.r_regionkey, 'supplier_region': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_9.txt b/tests/test_plan_refsols/simple_cross_9.txt index 7a1abb072..715af5d11 100644 --- a/tests/test_plan_refsols/simple_cross_9.txt +++ b/tests/test_plan_refsols/simple_cross_9.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n1', n_name), ('n2', n2)], orderings=[(n_name):asc_first, (n2):asc_first], limit=10:numeric) - JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n2': t1.n_name, 'n_name': t0.n_name}) - JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.n_name != t1.n_name & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n2': t1.n_name, 'n_name': t0.n_name}) + JOIN(condition=t0.r_name == t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'r_regionkey': t1.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index f81eb8d36..b9a9772c7 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -5,7 +5,7 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 601a5a746..1f9d454fc 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -1,5 +1,5 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True) / RELSUM(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last], cumulative=True), 2:numeric)), ('pct_bought_change', ROUND(100.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) - PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_n_rows, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('pct_incident_change', ROUND(100.0:numeric * DEFAULT_TO(sum_expr_4, 0:numeric) - PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]) / PREV(args=[DEFAULT_TO(sum_expr_4, 0:numeric)], partition=[], order=[(year):asc_last]), 2:numeric)), ('bought', DEFAULT_TO(sum_n_rows, 0:numeric)), ('incidents', DEFAULT_TO(sum_expr_4, 0:numeric))], orderings=[(year - YEAR(release_date)):asc_first]) - JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) + JOIN(condition=YEAR(t0.release_date) <= t1.year_1, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'release_date': t0.release_date, 'sum_expr_4': t1.sum_expr_4, 'sum_n_rows': t1.sum_n_rows, 'year': t1.year_1}) AGGREGATE(keys={}, aggregations={'release_date': ANYTHING(pr_release)}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) @@ -10,7 +10,7 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) @@ -18,7 +18,7 @@ ROOT(columns=[('years_since_release', year - YEAR(release_date)), ('cum_ir', ROU SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index b1f19bec2..fd04ea420 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -5,10 +5,10 @@ ROOT(columns=[('yr', year), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n_rows, JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index c3a94e9b4..447f85a3c 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,6 +1,6 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 05eafcbc8..8e0003f34 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], o FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index dbca0c73d..5c7637dd0 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,8 +1,8 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) - JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/tpch_q3.txt b/tests/test_plan_refsols/tpch_q3.txt index 0a7c9f97d..d7be30887 100644 --- a/tests/test_plan_refsols/tpch_q3.txt +++ b/tests/test_plan_refsols/tpch_q3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('L_ORDERKEY', l_orderkey), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('O_ORDERDATE', o_orderdate), ('O_SHIPPRIORITY', o_shippriority)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (o_orderdate):asc_first, (l_orderkey):asc_first], limit=10:numeric) AGGREGATE(keys={'l_orderkey': l_orderkey, 'o_orderdate': o_orderdate, 'o_shippriority': o_shippriority}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 'o_shippriority': t0.o_shippriority}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_shippriority': t0.o_shippriority}) FILTER(condition=o_orderdate < datetime.date(1995, 3, 15):datetime, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_shippriority': o_shippriority}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/tpch_q7.txt b/tests/test_plan_refsols/tpch_q7.txt index 813d42b01..2703970fa 100644 --- a/tests/test_plan_refsols/tpch_q7.txt +++ b/tests/test_plan_refsols/tpch_q7.txt @@ -1,7 +1,7 @@ ROOT(columns=[('SUPP_NATION', supp_nation), ('CUST_NATION', n_name), ('L_YEAR', l_year), ('REVENUE', DEFAULT_TO(sum_volume, 0:numeric))], orderings=[(supp_nation):asc_first, (n_name):asc_first, (l_year):asc_first]) AGGREGATE(keys={'l_year': YEAR(l_shipdate), 'n_name': n_name, 'supp_nation': supp_nation}, aggregations={'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey & t0.n_name == 'FRANCE':string & t1.n_name == 'GERMANY':string | t0.n_name == 'GERMANY':string & t1.n_name == 'FRANCE':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supp_nation': t0.n_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index f27de3c5c..f54d41956 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -1,12 +1,12 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0:numeric) / DEFAULT_TO(sum_volume, 0:numeric))], orderings=[]) AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey}) FILTER(condition=ISIN(YEAR(o_orderdate), [1995, 1996]:array[unknown]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsize.txt b/tests/test_plan_refsols/window_sliding_frame_relsize.txt index c3c89419b..63846c131 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsize.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsize.txt @@ -1,4 +1,2 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w2', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-4, 0))), ('w3', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w6', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, -1))), ('w7', RELSIZE(args=[], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5))), ('w8', RELSIZE(args=[], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-3, 5)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId}) diff --git a/tests/test_plan_refsols/window_sliding_frame_relsum.txt b/tests/test_plan_refsols/window_sliding_frame_relsum.txt index dbbba0f08..967cacd49 100644 --- a/tests/test_plan_refsols/window_sliding_frame_relsum.txt +++ b/tests/test_plan_refsols/window_sliding_frame_relsum.txt @@ -1,4 +1,2 @@ ROOT(columns=[('transaction_id', sbTxId), ('w1', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w2', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, 4))), ('w3', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w4', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(0, None))), ('w5', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w6', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(None, 1))), ('w7', RELSUM(args=[sbTxShares], partition=[], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1))), ('w8', RELSUM(args=[sbTxShares], partition=[sbTxCustId], order=[(sbTxDateTime):asc_last, (sbTxId):asc_last], frame=(-5, -1)))], orderings=[(sbTxDateTime):asc_first], limit=8:numeric) - JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxDateTime': t1.sbTxDateTime, 'sbTxId': t1.sbTxId, 'sbTxShares': t1.sbTxShares}) - SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares}) diff --git a/tests/test_pydough_from_string.py b/tests/test_pydough_from_string.py index 9919c38f3..1867b4f29 100644 --- a/tests/test_pydough_from_string.py +++ b/tests/test_pydough_from_string.py @@ -17,6 +17,7 @@ from tests.testing_utilities import graph_fetcher +@pytest.mark.execute @pytest.mark.parametrize( "pydough_code, answer_variable, env, answer", [ diff --git a/tests/test_sql_refsols/correl_36_sqlite.sql b/tests/test_sql_refsols/correl_36_sqlite.sql index 22d824e86..01635b227 100644 --- a/tests/test_sql_refsols/correl_36_sqlite.sql +++ b/tests/test_sql_refsols/correl_36_sqlite.sql @@ -1,16 +1,16 @@ -WITH _s3 AS ( +WITH _s1 AS ( SELECT p_partkey, p_type FROM tpch.part -), _s21 AS ( +), _t0 AS ( SELECT DISTINCT orders.o_orderkey AS key_12, lineitem.l_linenumber, lineitem.l_orderkey FROM tpch.lineitem AS lineitem - JOIN _s3 AS _s3 - ON _s3.p_partkey = lineitem.l_partkey + JOIN _s1 AS _s1 + ON _s1.p_partkey = lineitem.l_partkey JOIN tpch.supplier AS supplier ON lineitem.l_suppkey = supplier.s_suppkey JOIN tpch.orders AS orders @@ -44,20 +44,11 @@ WITH _s3 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem_2.l_shipdate) AS INTEGER) = 1997 AND lineitem_2.l_orderkey = orders_2.o_orderkey - JOIN _s3 AS _s19 - ON _s19.p_partkey = lineitem_2.l_partkey AND _s19.p_type = _s3.p_type + JOIN _s1 AS _s17 + ON _s1.p_type = _s17.p_type AND _s17.p_partkey = lineitem_2.l_partkey WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 ) SELECT COUNT(*) AS n -FROM tpch.lineitem AS lineitem -JOIN tpch.orders AS orders - ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 - AND lineitem.l_orderkey = orders.o_orderkey -JOIN _s21 AS _s21 - ON _s21.key_12 = orders.o_orderkey - AND _s21.l_linenumber = lineitem.l_linenumber - AND _s21.l_orderkey = lineitem.l_orderkey -WHERE - CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 +FROM _t0 diff --git a/tests/test_sql_refsols/quantile_test_1_ansi.sql b/tests/test_sql_refsols/quantile_test_1_ansi.sql index aa50858da..f06e8e049 100644 --- a/tests/test_sql_refsols/quantile_test_1_ansi.sql +++ b/tests/test_sql_refsols/quantile_test_1_ansi.sql @@ -1,7 +1,6 @@ SELECT PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY - orders.o_totalprice NULLS LAST) AS seventieth_order_price -FROM tpch.customer AS customer -JOIN tpch.orders AS orders - ON EXTRACT(YEAR FROM CAST(orders.o_orderdate AS DATETIME)) = 1998 - AND customer.c_custkey = orders.o_custkey + o_totalprice NULLS LAST) AS seventieth_order_price +FROM tpch.orders +WHERE + EXTRACT(YEAR FROM CAST(o_orderdate AS DATETIME)) = 1998 diff --git a/tests/test_sql_refsols/quantile_test_1_mysql.sql b/tests/test_sql_refsols/quantile_test_1_mysql.sql index 015388656..421c09607 100644 --- a/tests/test_sql_refsols/quantile_test_1_mysql.sql +++ b/tests/test_sql_refsols/quantile_test_1_mysql.sql @@ -1,14 +1,13 @@ WITH _t0 AS ( SELECT CASE - WHEN TRUNCATE(CAST(0.30000000000000004 * COUNT(ORDERS.o_totalprice) OVER () AS FLOAT), 0) < ROW_NUMBER() OVER (ORDER BY ORDERS.o_totalprice DESC) - THEN ORDERS.o_totalprice + WHEN TRUNCATE(CAST(0.30000000000000004 * COUNT(o_totalprice) OVER () AS FLOAT), 0) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice ELSE NULL END AS expr_1 - FROM tpch.CUSTOMER AS CUSTOMER - JOIN tpch.ORDERS AS ORDERS - ON CUSTOMER.c_custkey = ORDERS.o_custkey - AND EXTRACT(YEAR FROM CAST(ORDERS.o_orderdate AS DATETIME)) = 1998 + FROM tpch.ORDERS + WHERE + EXTRACT(YEAR FROM CAST(o_orderdate AS DATETIME)) = 1998 ) SELECT MAX(expr_1) AS seventieth_order_price diff --git a/tests/test_sql_refsols/quantile_test_1_sqlite.sql b/tests/test_sql_refsols/quantile_test_1_sqlite.sql index 01fc46e7c..39fab677f 100644 --- a/tests/test_sql_refsols/quantile_test_1_sqlite.sql +++ b/tests/test_sql_refsols/quantile_test_1_sqlite.sql @@ -1,14 +1,13 @@ WITH _t0 AS ( SELECT CASE - WHEN CAST(0.30000000000000004 * COUNT(orders.o_totalprice) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY orders.o_totalprice DESC) - THEN orders.o_totalprice + WHEN CAST(0.30000000000000004 * COUNT(o_totalprice) OVER () AS INTEGER) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice ELSE NULL END AS expr_1 - FROM tpch.customer AS customer - JOIN tpch.orders AS orders - ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 - AND customer.c_custkey = orders.o_custkey + FROM tpch.orders + WHERE + CAST(STRFTIME('%Y', o_orderdate) AS INTEGER) = 1998 ) SELECT MAX(expr_1) AS seventieth_order_price diff --git a/tests/test_sql_refsols/window_functions_ansi.sql b/tests/test_sql_refsols/window_functions_ansi.sql index 4b1b37292..10721b5f4 100644 --- a/tests/test_sql_refsols/window_functions_ansi.sql +++ b/tests/test_sql_refsols/window_functions_ansi.sql @@ -8,8 +8,6 @@ SELECT customer.c_acctbal / AVG(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, customer.c_acctbal / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, customer.c_acctbal / COUNT(*) OVER () AS relsize_value -FROM tpch.region AS region -JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey +FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_functions_mysql.sql b/tests/test_sql_refsols/window_functions_mysql.sql index 62661daa7..f51f418ee 100644 --- a/tests/test_sql_refsols/window_functions_mysql.sql +++ b/tests/test_sql_refsols/window_functions_mysql.sql @@ -8,8 +8,6 @@ SELECT CUSTOMER.c_acctbal / AVG(CUSTOMER.c_acctbal) OVER (ORDER BY CUSTOMER.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, CUSTOMER.c_acctbal / COUNT(CASE WHEN CUSTOMER.c_acctbal > 0.0 THEN CUSTOMER.c_acctbal ELSE NULL END) OVER () AS relcount_value, CUSTOMER.c_acctbal / COUNT(*) OVER () AS relsize_value -FROM tpch.REGION AS REGION -JOIN tpch.NATION AS NATION - ON NATION.n_regionkey = REGION.r_regionkey +FROM tpch.NATION AS NATION JOIN tpch.CUSTOMER AS CUSTOMER ON CUSTOMER.c_nationkey = NATION.n_nationkey diff --git a/tests/test_sql_refsols/window_functions_sqlite.sql b/tests/test_sql_refsols/window_functions_sqlite.sql index 7c648b070..c60adccfd 100644 --- a/tests/test_sql_refsols/window_functions_sqlite.sql +++ b/tests/test_sql_refsols/window_functions_sqlite.sql @@ -8,8 +8,6 @@ SELECT CAST(customer.c_acctbal AS REAL) / AVG(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, CAST(customer.c_acctbal AS REAL) / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, CAST(customer.c_acctbal AS REAL) / COUNT(*) OVER () AS relsize_value -FROM tpch.region AS region -JOIN tpch.nation AS nation - ON nation.n_regionkey = region.r_regionkey +FROM tpch.nation AS nation JOIN tpch.customer AS customer ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql index 093cb0ab5..4c8a3bb8b 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_ansi.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql index e3ecc2587..7fb2f5a2f 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_mysql.sql @@ -1,16 +1,14 @@ SELECT - sbTransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbCustomer AS sbCustomer -JOIN main.sbTransaction AS sbTransaction - ON sbCustomer.sbcustid = sbTransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbTransaction ORDER BY - sbTransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql index 57e3d18ed..a7babc7d7 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_sqlite.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, - COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, - COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql index 1c4e4da48..2db332255 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_ansi.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime NULLS LAST, sbtransaction.sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime NULLS LAST, sbtxid NULLS LAST ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql index ff2f29e69..e6237d92e 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_mysql.sql @@ -1,16 +1,14 @@ SELECT - sbTransaction.sbtxid AS transaction_id, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbTransaction.sbtxshares) OVER (ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbTransaction.sbtxshares) OVER (PARTITION BY sbTransaction.sbtxcustid ORDER BY sbTransaction.sbtxdatetime, sbTransaction.sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbCustomer AS sbCustomer -JOIN main.sbTransaction AS sbTransaction - ON sbCustomer.sbcustid = sbTransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid COLLATE utf8mb4_bin ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbTransaction ORDER BY - sbTransaction.sbtxdatetime + sbtxdatetime LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql index af2442b96..ed9b5c93b 100644 --- a/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_sqlite.sql @@ -1,16 +1,14 @@ SELECT - sbtransaction.sbtxid AS transaction_id, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, - SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, - SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 -FROM main.sbcustomer AS sbcustomer -JOIN main.sbtransaction AS sbtransaction - ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbtransaction ORDER BY - sbtransaction.sbtxdatetime + sbtxdatetime LIMIT 8 From 8f7fbbe29e03a85705cdd7534532321599bd0638 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 25 Aug 2025 16:01:07 -0400 Subject: [PATCH 89/97] Fixing bug [RUN CI] --- pydough/conversion/filter_pushdown.py | 1 - pydough/conversion/relational_converter.py | 25 +++++++++++++------ .../access_partition_child_after_filter.txt | 6 ++++- .../month_year_sliding_windows.txt | 14 ++++++++--- .../multi_partition_access_3.txt | 9 ++++--- .../test_plan_refsols/rank_with_filters_c.txt | 2 +- 6 files changed, 40 insertions(+), 17 deletions(-) diff --git a/pydough/conversion/filter_pushdown.py b/pydough/conversion/filter_pushdown.py index e247bd132..b77267bc7 100644 --- a/pydough/conversion/filter_pushdown.py +++ b/pydough/conversion/filter_pushdown.py @@ -248,7 +248,6 @@ def visit_join(self, join: Join) -> RelationalNode: cardinality = join.cardinality.add_filter() else: reverse_cardinality = reverse_cardinality.add_filter() - # Do the same pushable_filters = { expr.accept_shuttle(transposer) for expr in pushable_filters } diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index a751bfba4..25f47acba 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -1081,6 +1081,7 @@ def translate_partition_child( self, node: HybridPartitionChild, context: TranslationOutput | None, + preceding_hybrid: HybridTree | None, ) -> TranslationOutput: """ Converts a step into the child of a PARTITION node into a join between @@ -1092,6 +1093,8 @@ def translate_partition_child( `context`: the data structure storing information used by the conversion, such as bindings of already translated terms from preceding contexts. + `preceding_hybrid`: the previous layer in the hybrid tree above the + current level. Returns: The TranslationOutput payload containing expressions for both the @@ -1127,7 +1130,9 @@ def translate_partition_child( child_output, JoinType.INNER, JoinCardinality.PLURAL_FILTER, - JoinCardinality.SINGULAR_ACCESS, + JoinCardinality.SINGULAR_ACCESS + if preceding_hybrid is not None and preceding_hybrid.always_exists() + else JoinCardinality.SINGULAR_FILTER, join_keys, None, None, @@ -1305,7 +1310,11 @@ def rel_translation( else: result = self.build_simple_table_scan(operation) case HybridPartitionChild(): - result = self.translate_partition_child(operation, context) + result = self.translate_partition_child( + operation, + context, + preceding_hybrid[0] if preceding_hybrid is not None else None, + ) case HybridCalculate(): assert context is not None, "Malformed HybridTree pattern." result = self.translate_calculate(operation, context) @@ -1473,12 +1482,14 @@ def optimize_relational_tree( The optimized relational root. """ + pruner: ColumnPruner = ColumnPruner() + # Step 0: prune unused columns. This is done early to remove as many dead # names as possible so that steps that require generating column names can # use nicer names instead of generating nastier ones to avoid collisions. # It also speeds up all subsequent steps by reducing the total number of # objects inside the plan. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 1: push filters down as far as possible root = confirm_root(push_filters(root)) @@ -1500,10 +1511,10 @@ def optimize_relational_tree( root = confirm_root(merge_projects(root)) # Step 6: re-run column pruning after the various steps, which may have - # rendered more columns unused. This is done befre the next step to remove + # rendered more columns unused. This is done before the next step to remove # as many column names as possible so the column bubbling step can try to # use nicer names without worrying about collisions. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 7: bubble up names from the leaf nodes to further encourage simpler # naming without aliases, and also to delete duplicate columns where @@ -1524,7 +1535,7 @@ def optimize_relational_tree( root = confirm_root(pullup_projections(root)) simplify_expressions(root, additional_shuttles) root = confirm_root(push_filters(root)) - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) # Step 9: re-run projection merging, without pushing into joins. This # will allow some redundant projections created by pullup to be removed @@ -1538,7 +1549,7 @@ def optimize_relational_tree( # Step 11: re-run column pruning one last time to remove any columns that # are no longer used after the final round of transformations. - root = ColumnPruner().prune_unused_columns(root) + root = pruner.prune_unused_columns(root) return root diff --git a/tests/test_plan_refsols/access_partition_child_after_filter.txt b/tests/test_plan_refsols/access_partition_child_after_filter.txt index e0889ae05..baeffc577 100644 --- a/tests/test_plan_refsols/access_partition_child_after_filter.txt +++ b/tests/test_plan_refsols/access_partition_child_after_filter.txt @@ -1,2 +1,6 @@ ROOT(columns=[('part_name', p_name), ('part_type', p_type), ('retail_price', p_retailprice)], orderings=[]) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) + JOIN(condition=t0.p_type == t1.p_type, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_type': t1.p_type}) + FILTER(condition=avg_p_retailprice > 27.5:numeric, columns={'p_type': p_type}) + AGGREGATE(keys={'p_type': p_type}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) + SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index b8ba05d61..d91f3ab37 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,5 +1,11 @@ ROOT(columns=[('year', year), ('month', month)], orderings=[(year):asc_first, (month):asc_first]) - FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_1):asc_last, (month_1):asc_last], default=0.0), columns={'month': month_1, 'year': year_1}) - AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) - SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year):asc_last, (month):asc_last], default=0.0), columns={'month': month, 'year': year}) + JOIN(condition=t0.year == t1.year_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t1.month_1, 'sum_o_totalprice': t1.sum_o_totalprice, 'year': t1.year_1}) + FILTER(condition=DEFAULT_TO(sum_month_total_spent, 0:numeric) > next_year_total_spent, columns={'year': year}) + PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_month_total_spent, 0:numeric)], partition=[], order=[(year):asc_last], default=0.0), 'sum_month_total_spent': sum_month_total_spent, 'year': year}) + AGGREGATE(keys={'year': YEAR(o_orderdate)}, aggregations={'sum_month_total_spent': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + AGGREGATE(keys={'month_1': MONTH(o_orderdate), 'year_1': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index e2cf74fb0..0274e95db 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,9 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) JOIN(condition=t1.sbDpClose < t0.type_high_price & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'type_high_price': MAX(sbDpClose)}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) + SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.ticker_high_price, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'ticker_high_price': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index f985be11c..f778ba043 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,6 +1,6 @@ ROOT(columns=[('pname', p_name), ('psize', size_3)], orderings=[]) FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first]) == 1:numeric, columns={'p_name': p_name, 'size_3': size_3}) - JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) + JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_3': t1.p_size}) LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) SCAN(table=tpch.PART, columns={'p_size': p_size}) From d797c439b4a5b4cfef381d4cfaed5d1666dd4b5c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 10:54:52 -0400 Subject: [PATCH 90/97] Adjusting aggregation splitting to account for reverse cardinality [RUN CI] [RUN MYSQL] --- pydough/conversion/agg_split.py | 1 + tests/test_plan_refsols/common_prefix_af.txt | 16 ++++---- tests/test_plan_refsols/common_prefix_o.txt | 31 +++++++------- tests/test_plan_refsols/correl_14.txt | 8 ++-- tests/test_plan_refsols/correl_15.txt | 8 ++-- tests/test_plan_refsols/correl_35.txt | 22 +++++----- .../count_cust_supplier_nation_combos.txt | 32 +++++++-------- .../customers_sum_line_price.txt | 11 +++-- .../multi_partition_access_2.txt | 30 +++++++------- .../nations_sum_line_price.txt | 11 +++-- .../regions_sum_line_price.txt | 11 +++-- tests/test_plan_refsols/triple_partition.txt | 33 ++++++++------- tests/test_sql_refsols/correl_14_sqlite.sql | 11 ++--- tests/test_sql_refsols/correl_15_sqlite.sql | 11 ++--- tests/test_sql_refsols/correl_35_sqlite.sql | 40 +++++-------------- .../defog_broker_adv14_ansi.sql | 18 +++++++-- .../defog_broker_adv14_mysql.sql | 18 +++++++-- .../defog_broker_adv14_sqlite.sql | 22 +++++++--- 18 files changed, 165 insertions(+), 169 deletions(-) diff --git a/pydough/conversion/agg_split.py b/pydough/conversion/agg_split.py index bcbf82cf6..e4081861f 100644 --- a/pydough/conversion/agg_split.py +++ b/pydough/conversion/agg_split.py @@ -322,6 +322,7 @@ def attempt_join_aggregate_transpose( # if joining first will reduce the number of rows that get aggregated. if join.cardinality.filters: can_push_left = False + if join.reverse_cardinality.filters: can_push_right = False # If any of the aggregations to either side cannot be pushed down, then diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index e2a896fca..ce89a069c 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,15 +1,15 @@ -ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_c_name': t1.max_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) +ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_anything_c_name)], orderings=[(n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_c_name': max_c_name, 'n_rows': n_rows}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_c_name': MAX(c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_anything_c_name': max_anything_c_name, 'n_rows': n_rows}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_anything_c_name': MAX(anything_c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_rows': t0.n_rows, 'o_custkey': t0.o_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 199fb1393..b4603c210 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,21 +1,20 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_small_parts': sum_sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_n_rows': sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t0.sum_n_rows, 'sum_sum_agg_5': t0.sum_sum_agg_5, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_agg_5': t1.sum_sum_agg_5, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'n_small_parts': sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_p_retailprice': sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_n_rows': sum_n_rows, 'sum_sum_agg_5': sum_sum_agg_5, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_5': SUM(sum_agg_5), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t0.n_rows, 's_acctbal': t1.s_acctbal, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'sum_agg_5': sum_agg_5, 'sum_p_retailprice': sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_5': SUM(agg_5), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t0.agg_5, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_5': t1.agg_5, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + PROJECT(columns={'agg_5': 1:numeric, 'p_partkey': p_partkey}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index b8d16913a..5341118f8 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,11 +1,11 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index d821dbbe7..0795bc81e 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,14 +1,14 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr_1 & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t0.sum_expr_1, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'sum_expr_1': t1.sum_expr_1, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr_1': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index 96ec04e08..9b491ce4d 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -10,15 +10,13 @@ ROOT(columns=[('n', n)], orderings=[]) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=sum_n_rows > 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=QUARTER(l_shipdate) == 1:numeric & YEAR(l_shipdate) == 1997:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt index 3c1b317ec..0b0bbd46e 100644 --- a/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt +++ b/tests/test_plan_refsols/count_cust_supplier_nation_combos.txt @@ -1,19 +1,15 @@ -ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', sum_sum_sum_sum_agg_0), ('total_value', DEFAULT_TO(sum_sum_sum_sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': year}, aggregations={'sum_sum_sum_sum_agg_0': SUM(sum_sum_sum_agg_0), 'sum_sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'sum_sum_sum_agg_0': t0.sum_sum_sum_agg_0, 'sum_sum_sum_sum_l_extendedprice': t0.sum_sum_sum_sum_l_extendedprice, 'supplier_nation': t1.n_name, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 's_nationkey': s_nationkey, 'year': year}, aggregations={'sum_sum_sum_agg_0': SUM(sum_sum_agg_0), 'sum_sum_sum_sum_l_extendedprice': SUM(sum_sum_sum_l_extendedprice)}) - JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey, 'sum_sum_agg_0': t0.sum_sum_agg_0, 'sum_sum_sum_l_extendedprice': t0.sum_sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'n_name': n_name, 'ps_suppkey': ps_suppkey, 'year': year}, aggregations={'sum_sum_agg_0': SUM(sum_agg_0), 'sum_sum_sum_l_extendedprice': SUM(sum_sum_l_extendedprice)}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'ps_suppkey': t1.ps_suppkey, 'sum_agg_0': t0.sum_agg_0, 'sum_sum_l_extendedprice': t0.sum_sum_l_extendedprice, 'year': t0.year}) - AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey, 'n_name': n_name, 'year': YEAR(o_orderdate)}, aggregations={'sum_agg_0': COUNT(), 'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'sum_l_extendedprice': t1.sum_l_extendedprice}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) +ROOT(columns=[('year', year), ('customer_nation', n_name), ('supplier_nation', supplier_nation), ('num_occurrences', n_rows), ('total_value', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + AGGREGATE(keys={'n_name': n_name, 'supplier_nation': supplier_nation, 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT(), 'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'supplier_nation': t1.n_name}) + JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 's_nationkey': t1.s_nationkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_suppkey': t1.ps_suppkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 082a9e3e7..799b8abe5 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,8 +1,7 @@ -ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index 95593d4d1..cad1d95b5 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,28 +1,30 @@ -ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', cust_avg_shares)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType}) - JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.cust_avg_shares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cust_avg_shares': t0.cust_avg_shares, 'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_avg_shares': t1.cust_avg_shares, 'sbTxCustId': t0.sbTxCustId}) +ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', cus_tick_typ_avg_shares), ('cust_tick_avg_shares', sum_sum_sbTxShares_1 / sum_count_sbTxShares_1), ('cust_avg_shares', sum_sum_sbTxShares / sum_count_sbTxShares)], orderings=[(sbTxId):asc_first]) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t1.sbTxShares < t0.cus_tick_typ_avg_shares & t1.sbTxShares < t0.sum_sum_sbTxShares / t0.sum_count_sbTxShares & t1.sbTxShares < t0.sum_sum_sbTxShares_1 / t0.sum_count_sbTxShares_1 & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'cus_tick_typ_avg_shares': t0.cus_tick_typ_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'cus_tick_typ_avg_shares': t1.cus_tick_typ_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t0.sum_count_sbTxShares_1, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t0.sum_sum_sbTxShares_1}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_count_sbTxShares': t0.sum_count_sbTxShares, 'sum_count_sbTxShares_1': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t0.sum_sum_sbTxShares, 'sum_sum_sbTxShares_1': t1.sum_sum_sbTxShares}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'cust_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + JOIN(condition=t0.customer_id_9 == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'customer_id_9': t1.sbTxCustId, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'cust_tick_avg_shares': t1.cust_tick_avg_shares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_count_sbTxShares': t1.sum_count_sbTxShares, 'sum_sum_sbTxShares': t1.sum_sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'cust_tick_avg_shares': AVG(sbTxShares)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbTxCustId': t0.sbTxCustId, 'sbTxShares': t1.sbTxShares, 'sbTxTickerId': t0.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'sum_count_sbTxShares': SUM(count_sbTxShares), 'sum_sum_sbTxShares': SUM(sum_sbTxShares)}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t0.sbTxTickerId, 'sum_sbTxShares': t1.sum_sbTxShares}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'count_sbTxShares': COUNT(sbTxShares), 'sum_sbTxShares': SUM(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}, aggregations={'cus_tick_typ_avg_shares': AVG(sbTxShares)}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId, 'sbTxType': sbTxType}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 7ec5765c3..6c3ad7c6f 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,10 +1,9 @@ -ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'l_extendedprice': t1.l_extendedprice}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index a8f014f55..67a0c6401 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,12 +1,11 @@ -ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_sum_l_extendedprice': t1.sum_sum_l_extendedprice}) +ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_sum_l_extendedprice': SUM(sum_l_extendedprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_regionkey': t0.n_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) - SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/triple_partition.txt b/tests/test_plan_refsols/triple_partition.txt index 067b2808d..f59dd133f 100644 --- a/tests/test_plan_refsols/triple_partition.txt +++ b/tests/test_plan_refsols/triple_partition.txt @@ -1,23 +1,22 @@ ROOT(columns=[('region', supp_region), ('avgpct', avg_percentage)], orderings=[(supp_region):asc_first]) AGGREGATE(keys={'supp_region': supp_region}, aggregations={'avg_percentage': AVG(100.0:numeric * max_n_instances / sum_n_instances)}) AGGREGATE(keys={'r_name': r_name, 'supp_region': supp_region}, aggregations={'max_n_instances': MAX(n_instances), 'sum_n_instances': SUM(n_instances)}) - AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': SUM(n_instances)}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_instances': t0.n_instances, 'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) - AGGREGATE(keys={'o_custkey': o_custkey, 'p_type': p_type, 'r_name': r_name}, aggregations={'n_instances': COUNT()}) - JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) - FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + AGGREGATE(keys={'p_type': p_type, 'r_name': r_name, 'supp_region': supp_region}, aggregations={'n_instances': COUNT()}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_type': t0.p_type, 'r_name': t1.r_name, 'supp_region': t0.r_name}) + JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'p_type': t0.p_type, 'r_name': t0.r_name}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 'r_name': t1.r_name}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'p_type': t0.p_type}) + FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey, 'p_type': p_type}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_type': p_type}) + FILTER(condition=MONTH(l_shipdate) == 6:numeric & YEAR(l_shipdate) == 1992:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t1.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'r_name': t1.r_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 6e964f99b..2bf406ddb 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,13 +1,12 @@ WITH _s3 AS ( SELECT - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr_1, - SUM(part.p_retailprice) AS sum_p_retailprice, + AVG(part.p_retailprice) AS supplier_avg_price, partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 3 + 2 ) SELECT COUNT(DISTINCT supplier.s_suppkey) AS n @@ -17,11 +16,9 @@ JOIN _s3 AS _s3 JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part - ON part.p_container = 'LG DRUM' + ON _s3.supplier_avg_price > part.p_retailprice + AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey - AND part.p_retailprice < ( - CAST(_s3.sum_p_retailprice AS REAL) / _s3.sum_expr_1 - ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 ) diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index f59429df8..668627c41 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -4,14 +4,13 @@ WITH _s0 AS ( FROM tpch.part ), _s5 AS ( SELECT - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr_1, - SUM(part.p_retailprice) AS sum_p_retailprice, + AVG(part.p_retailprice) AS supplier_avg_price, partsupp.ps_suppkey FROM tpch.partsupp AS partsupp JOIN tpch.part AS part ON part.p_partkey = partsupp.ps_partkey GROUP BY - 3 + 2 ) SELECT COUNT(DISTINCT supplier.s_suppkey) AS n @@ -23,11 +22,9 @@ JOIN _s5 AS _s5 JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey JOIN tpch.part AS part - ON part.p_container = 'LG DRUM' + ON _s5.supplier_avg_price > part.p_retailprice + AND part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey - AND part.p_retailprice < ( - CAST(_s5.sum_p_retailprice AS REAL) / _s5.sum_expr_1 - ) AND part.p_retailprice < ( _s0.global_avg_price * 0.85 ) diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index b1750bbd1..cfa10bec4 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -3,13 +3,12 @@ WITH _s1 AS ( p_partkey, p_type FROM tpch.part -), _s10 AS ( - SELECT - COUNT(*) AS n_rows, +), _s13 AS ( + SELECT DISTINCT customer.c_custkey, customer.c_nationkey, - lineitem.l_partkey, - orders.o_orderpriority + orders.o_orderpriority, + _s11.p_type FROM tpch.customer AS customer JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1997 @@ -31,26 +30,8 @@ WITH _s1 AS ( END = 1 AND CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND lineitem.l_orderkey = orders.o_orderkey - GROUP BY - 2, - 3, - 4, - 5 -), _t3 AS ( - SELECT - SUM(_s10.n_rows) AS sum_n_rows, - _s10.c_custkey, - _s10.c_nationkey, - _s10.o_orderpriority, - _s11.p_type - FROM _s10 AS _s10 JOIN _s1 AS _s11 - ON _s10.l_partkey = _s11.p_partkey - GROUP BY - 2, - 3, - 4, - 5 + ON _s11.p_partkey = lineitem.l_partkey ) SELECT COUNT(*) AS n @@ -62,11 +43,10 @@ JOIN tpch.supplier AS supplier JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 AND lineitem.l_orderkey = orders.o_orderkey -JOIN _t3 AS _t3 - ON _s1.p_type = _t3.p_type - AND _t3.c_custkey = orders.o_custkey - AND _t3.c_nationkey = supplier.s_nationkey - AND _t3.o_orderpriority = orders.o_orderpriority - AND _t3.sum_n_rows > 0 +JOIN _s13 AS _s13 + ON _s1.p_type = _s13.p_type + AND _s13.c_custkey = orders.o_custkey + AND _s13.c_nationkey = supplier.s_nationkey + AND _s13.o_orderpriority = orders.o_orderpriority WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 diff --git a/tests/test_sql_refsols/defog_broker_adv14_ansi.sql b/tests/test_sql_refsols/defog_broker_adv14_ansi.sql index f7e6196f3..a8afab917 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_ansi.sql @@ -1,9 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbdpclose, + SUM(sbdpclose) AS sum_sbdpclose, + sbdptickerid + FROM main.sbdailyprice + WHERE + DATEDIFF(CURRENT_TIMESTAMP(), CAST(sbdpdate AS DATETIME), DAY) <= 7 + GROUP BY + 3 +) SELECT sbticker.sbtickertype AS ticker_type, - AVG(sbdailyprice.sbdpclose) AS ACP + SUM(_s1.sum_sbdpclose) / SUM(_s1.count_sbdpclose) AS ACP FROM main.sbticker AS sbticker -JOIN main.sbdailyprice AS sbdailyprice - ON DATEDIFF(CURRENT_TIMESTAMP(), CAST(sbdailyprice.sbdpdate AS DATETIME), DAY) <= 7 - AND sbdailyprice.sbdptickerid = sbticker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_mysql.sql b/tests/test_sql_refsols/defog_broker_adv14_mysql.sql index 2e6957884..4dee5aaa5 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_mysql.sql @@ -1,9 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbDpClose, + SUM(sbdpclose) AS sum_sbDpClose, + sbdptickerid AS sbDpTickerId + FROM main.sbDailyPrice + WHERE + DATEDIFF(CURRENT_TIMESTAMP(), sbdpdate) <= 7 + GROUP BY + 3 +) SELECT sbTicker.sbtickertype AS ticker_type, - AVG(sbDailyPrice.sbdpclose) AS ACP + SUM(_s1.sum_sbDpClose) / SUM(_s1.count_sbDpClose) AS ACP FROM main.sbTicker AS sbTicker -JOIN main.sbDailyPrice AS sbDailyPrice - ON DATEDIFF(CURRENT_TIMESTAMP(), sbDailyPrice.sbdpdate) <= 7 - AND sbDailyPrice.sbdptickerid = sbTicker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbDpTickerId = sbTicker.sbtickerid GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql index 38cfb931c..164838cf8 100644 --- a/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv14_sqlite.sql @@ -1,11 +1,21 @@ +WITH _s1 AS ( + SELECT + COUNT(sbdpclose) AS count_sbdpclose, + SUM(sbdpclose) AS sum_sbdpclose, + sbdptickerid + FROM main.sbdailyprice + WHERE + CAST(( + JULIANDAY(DATE(DATETIME('now'), 'start of day')) - JULIANDAY(DATE(sbdpdate, 'start of day')) + ) AS INTEGER) <= 7 + GROUP BY + 3 +) SELECT sbticker.sbtickertype AS ticker_type, - AVG(sbdailyprice.sbdpclose) AS ACP + CAST(SUM(_s1.sum_sbdpclose) AS REAL) / SUM(_s1.count_sbdpclose) AS ACP FROM main.sbticker AS sbticker -JOIN main.sbdailyprice AS sbdailyprice - ON CAST(( - JULIANDAY(DATE(DATETIME('now'), 'start of day')) - JULIANDAY(DATE(sbdailyprice.sbdpdate, 'start of day')) - ) AS INTEGER) <= 7 - AND sbdailyprice.sbdptickerid = sbticker.sbtickerid +JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid GROUP BY 1 From 3041ac98e00c6bb547d88ff00bf297e7c2f66ef9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 11:04:19 -0400 Subject: [PATCH 91/97] Stop printing cardinalities in plan files for semi/anti joins --- pydough/relational/relational_nodes/join.py | 2 ++ tests/test_plan_refsols/aggregate_anti.txt | 2 +- tests/test_plan_refsols/anti_aggregate.txt | 2 +- tests/test_plan_refsols/anti_aggregate_alternate.txt | 2 +- tests/test_plan_refsols/anti_singular.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_aa.txt | 2 +- tests/test_plan_refsols/common_prefix_ab.txt | 4 ++-- tests/test_plan_refsols/common_prefix_ac.txt | 2 +- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/correl_10.txt | 2 +- tests/test_plan_refsols/correl_4.txt | 2 +- tests/test_plan_refsols/correl_7.txt | 2 +- tests/test_plan_refsols/epoch_num_predawn_cold_war.txt | 2 +- tests/test_plan_refsols/multiple_has_hasnot.txt | 6 +++--- tests/test_plan_refsols/simple_anti_1.txt | 2 +- tests/test_plan_refsols/simple_anti_2.txt | 2 +- tests/test_plan_refsols/simple_semi_1.txt | 2 +- tests/test_plan_refsols/simple_semi_2.txt | 2 +- tests/test_plan_refsols/singular_anti.txt | 2 +- tests/test_plan_refsols/supplier_pct_national_qty.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q4.txt | 2 +- tests/test_plan_refsols/window_filter_order_10.txt | 2 +- 24 files changed, 28 insertions(+), 26 deletions(-) diff --git a/pydough/relational/relational_nodes/join.py b/pydough/relational/relational_nodes/join.py index 6c6a7a34d..a8401e73b 100644 --- a/pydough/relational/relational_nodes/join.py +++ b/pydough/relational/relational_nodes/join.py @@ -275,11 +275,13 @@ def to_string(self, compact: bool = False) -> str: cardinality_suffix: str = ( "" if self.cardinality == JoinCardinality.UNKNOWN_UNKNOWN + or self.join_type in (JoinType.SEMI, JoinType.ANTI) else f", cardinality={self.cardinality.name}" ) reverse_cardinality_suffix: str = ( "" if self.reverse_cardinality == JoinCardinality.UNKNOWN_UNKNOWN + or self.join_type in (JoinType.SEMI, JoinType.ANTI) else f", reverse_cardinality={self.reverse_cardinality.name}" ) return f"JOIN(condition={self.condition.to_string(compact)}, type={self.join_type.name}{cardinality_suffix}{reverse_cardinality_suffix}, columns={self.make_column_string(self.columns, compact)}{correl_suffix})" diff --git a/tests/test_plan_refsols/aggregate_anti.txt b/tests/test_plan_refsols/aggregate_anti.txt index d780bbcb5..c61c67b94 100644 --- a/tests/test_plan_refsols/aggregate_anti.txt +++ b/tests/test_plan_refsols/aggregate_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate.txt b/tests/test_plan_refsols/anti_aggregate.txt index b5feeed30..c61c67b94 100644 --- a/tests/test_plan_refsols/anti_aggregate.txt +++ b/tests/test_plan_refsols/anti_aggregate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', None:unknown), ('sum_price_of_10parts', 0:numeric)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_aggregate_alternate.txt b/tests/test_plan_refsols/anti_aggregate_alternate.txt index 02ee0138c..c12bdd20e 100644 --- a/tests/test_plan_refsols/anti_aggregate_alternate.txt +++ b/tests/test_plan_refsols/anti_aggregate_alternate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', 0:numeric), ('avg_price_of_10parts', 0:numeric), ('sum_price_of_10parts', None:unknown)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/anti_singular.txt b/tests/test_plan_refsols/anti_singular.txt index 117716d5e..da23cf491 100644 --- a/tests/test_plan_refsols/anti_singular.txt +++ b/tests/test_plan_refsols/anti_singular.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index 3feedb723..fb5de210c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,5 +1,5 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_aa.txt b/tests/test_plan_refsols/common_prefix_aa.txt index 6ae902b07..6c1032c23 100644 --- a/tests/test_plan_refsols/common_prefix_aa.txt +++ b/tests/test_plan_refsols/common_prefix_aa.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 323768976..14edc53f2 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ac.txt b/tests/test_plan_refsols/common_prefix_ac.txt index 6e860bc35..3e5a7d814 100644 --- a/tests/test_plan_refsols/common_prefix_ac.txt +++ b/tests/test_plan_refsols/common_prefix_ac.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 3b3b5fad6..05678bd68 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_10.txt b/tests/test_plan_refsols/correl_10.txt index fcb05e48c..66d96bafc 100644 --- a/tests/test_plan_refsols/correl_10.txt +++ b/tests/test_plan_refsols/correl_10.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', None:unknown)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_4.txt b/tests/test_plan_refsols/correl_4.txt index 8f72568f4..e37bfac00 100644 --- a/tests/test_plan_refsols/correl_4.txt +++ b/tests/test_plan_refsols/correl_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t1.c_acctbal <= t0.smallest_bal + 5.0:numeric & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'smallest_bal': t0.smallest_bal}) diff --git a/tests/test_plan_refsols/correl_7.txt b/tests/test_plan_refsols/correl_7.txt index 7586ae463..d9e84b642 100644 --- a/tests/test_plan_refsols/correl_7.txt +++ b/tests/test_plan_refsols/correl_7.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', 0:numeric)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=ANTI, columns={'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) PROJECT(columns={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt index a3a22d030..ced52113b 100644 --- a/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt +++ b/tests/test_plan_refsols/epoch_num_predawn_cold_war.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_events', n_events)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_events': COUNT()}) - JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={}) + JOIN(condition=t0.ev_key == t1.ev_key, type=SEMI, columns={}) JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) FILTER(condition=t_name == 'Pre-Dawn':string, columns={'t_end_hour': t_end_hour, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/multiple_has_hasnot.txt b/tests/test_plan_refsols/multiple_has_hasnot.txt index ec80d0ec2..78b9d31fd 100644 --- a/tests/test_plan_refsols/multiple_has_hasnot.txt +++ b/tests/test_plan_refsols/multiple_has_hasnot.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', p_name)], orderings=[]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=ANTI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=SEMI, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/simple_anti_1.txt b/tests/test_plan_refsols/simple_anti_1.txt index 1b99d8c05..c00785e14 100644 --- a/tests/test_plan_refsols/simple_anti_1.txt +++ b/tests/test_plan_refsols/simple_anti_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_anti_2.txt b/tests/test_plan_refsols/simple_anti_2.txt index 322e6b23c..b87256acc 100644 --- a/tests/test_plan_refsols/simple_anti_2.txt +++ b/tests/test_plan_refsols/simple_anti_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=ANTI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_semi_1.txt b/tests/test_plan_refsols/simple_semi_1.txt index 98a13ec16..dd41107a3 100644 --- a/tests/test_plan_refsols/simple_semi_1.txt +++ b/tests/test_plan_refsols/simple_semi_1.txt @@ -1,4 +1,4 @@ ROOT(columns=[('name', c_name)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, columns={'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/simple_semi_2.txt b/tests/test_plan_refsols/simple_semi_2.txt index 146ebf45e..d52362ab1 100644 --- a/tests/test_plan_refsols/simple_semi_2.txt +++ b/tests/test_plan_refsols/simple_semi_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name)], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=SEMI, columns={'s_name': t0.s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/singular_anti.txt b/tests/test_plan_refsols/singular_anti.txt index 7de3c3eec..0f3e69223 100644 --- a/tests/test_plan_refsols/singular_anti.txt +++ b/tests/test_plan_refsols/singular_anti.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('region_name', None:unknown)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=ANTI, columns={'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) FILTER(condition=r_name != 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index 58a14443e..f2a5c7f3c 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,7 +1,7 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index 8e0003f34..e15d1773c 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -5,7 +5,7 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], o FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'anything_l_suppkey': anything_l_suppkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_l_suppkey': t0.anything_l_suppkey}) + JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.o_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) diff --git a/tests/test_plan_refsols/tpch_q4.txt b/tests/test_plan_refsols/tpch_q4.txt index c2fe0f9a6..3ef651d9a 100644 --- a/tests/test_plan_refsols/tpch_q4.txt +++ b/tests/test_plan_refsols/tpch_q4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('O_ORDERPRIORITY', o_orderpriority), ('ORDER_COUNT', ORDER_COUNT)], orderings=[(o_orderpriority):asc_first]) AGGREGATE(keys={'o_orderpriority': o_orderpriority}, aggregations={'ORDER_COUNT': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, columns={'o_orderpriority': t0.o_orderpriority}) FILTER(condition=QUARTER(o_orderdate) == 3:numeric & YEAR(o_orderdate) == 1993:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) FILTER(condition=l_commitdate < l_receiptdate, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/window_filter_order_10.txt b/tests/test_plan_refsols/window_filter_order_10.txt index dad30206d..d3aa9b4ff 100644 --- a/tests/test_plan_refsols/window_filter_order_10.txt +++ b/tests/test_plan_refsols/window_filter_order_10.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=o_totalprice < 0.05:numeric * RELAVG(args=[None:unknown], partition=[], order=[]), columns={}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=ANTI, columns={'o_totalprice': t0.o_totalprice}) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) From efa133562465b1d0be09fb779d6e01f78201828a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 26 Aug 2025 11:25:48 -0400 Subject: [PATCH 92/97] Revisions and documentation [RUN CI] [RUN MYSQL] --- pydough/conversion/hybrid_tree.py | 101 ++++++++++++------ pydough/conversion/relational_converter.py | 23 ++-- tests/test_plan_refsols/agg_max_ranking.txt | 2 +- .../agg_orders_by_year_month_just_europe.txt | 2 +- .../agg_orders_by_year_month_vs_europe.txt | 2 +- .../aggregate_mixed_levels_simple.txt | 2 +- .../aggregate_on_function_call.txt | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- .../aggregate_then_backref.txt | 2 +- .../aggregation_analytics_2.txt | 2 +- .../aggregation_analytics_3.txt | 2 +- .../test_plan_refsols/avg_acctbal_wo_debt.txt | 2 +- .../avg_order_diff_per_customer.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_1.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_2.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_3.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_4.txt | 2 +- tests/test_plan_refsols/bad_child_reuse_5.txt | 2 +- tests/test_plan_refsols/common_prefix_a.txt | 4 +- tests/test_plan_refsols/common_prefix_ad.txt | 6 +- tests/test_plan_refsols/common_prefix_ae.txt | 4 +- tests/test_plan_refsols/common_prefix_af.txt | 4 +- tests/test_plan_refsols/common_prefix_al.txt | 6 +- tests/test_plan_refsols/common_prefix_am.txt | 4 +- tests/test_plan_refsols/common_prefix_an.txt | 6 +- tests/test_plan_refsols/common_prefix_ao.txt | 6 +- tests/test_plan_refsols/common_prefix_ap.txt | 2 +- tests/test_plan_refsols/common_prefix_aq.txt | 8 +- tests/test_plan_refsols/common_prefix_b.txt | 6 +- tests/test_plan_refsols/common_prefix_c.txt | 10 +- tests/test_plan_refsols/common_prefix_d.txt | 12 +-- tests/test_plan_refsols/common_prefix_e.txt | 4 +- tests/test_plan_refsols/common_prefix_f.txt | 6 +- tests/test_plan_refsols/common_prefix_g.txt | 6 +- tests/test_plan_refsols/common_prefix_h.txt | 10 +- tests/test_plan_refsols/common_prefix_i.txt | 4 +- tests/test_plan_refsols/common_prefix_l.txt | 4 +- tests/test_plan_refsols/common_prefix_m.txt | 4 +- tests/test_plan_refsols/common_prefix_n.txt | 4 +- tests/test_plan_refsols/common_prefix_o.txt | 4 +- tests/test_plan_refsols/common_prefix_p.txt | 4 +- tests/test_plan_refsols/common_prefix_q.txt | 2 +- tests/test_plan_refsols/common_prefix_r.txt | 2 +- tests/test_plan_refsols/common_prefix_s.txt | 4 +- tests/test_plan_refsols/common_prefix_t.txt | 4 +- tests/test_plan_refsols/common_prefix_u.txt | 4 +- tests/test_plan_refsols/common_prefix_x.txt | 4 +- tests/test_plan_refsols/common_prefix_y.txt | 4 +- tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_13.txt | 2 +- tests/test_plan_refsols/correl_14.txt | 2 +- tests/test_plan_refsols/correl_15.txt | 2 +- tests/test_plan_refsols/correl_2.txt | 2 +- tests/test_plan_refsols/correl_29.txt | 6 +- tests/test_plan_refsols/correl_30.txt | 4 +- tests/test_plan_refsols/correl_6.txt | 2 +- ...count_at_most_100_suppliers_per_nation.txt | 2 +- ...multiple_subcollections_alongside_aggs.txt | 4 +- .../count_single_subcollection.txt | 2 +- .../customer_largest_order_deltas.txt | 4 +- .../customer_most_recent_orders.txt | 2 +- .../customers_sum_line_price.txt | 2 +- .../test_plan_refsols/deep_best_analysis.txt | 4 +- .../epoch_users_most_cold_war_searches.txt | 2 +- .../first_order_per_customer.txt | 2 +- tests/test_plan_refsols/hour_minute_day.txt | 2 +- ...lineitems_access_cust_supplier_nations.txt | 2 +- .../lines_german_supplier_economy_part.txt | 2 +- .../mostly_positive_accounts_per_nation1.txt | 4 +- .../mostly_positive_accounts_per_nation2.txt | 4 +- .../mostly_positive_accounts_per_nation3.txt | 4 +- ...ple_simple_aggregations_multiple_calcs.txt | 4 +- ...ltiple_simple_aggregations_single_calc.txt | 4 +- .../nation_acctbal_breakdown.txt | 2 +- tests/test_plan_refsols/nation_best_order.txt | 2 +- .../nations_order_by_num_suppliers.txt | 2 +- .../nations_sum_line_price.txt | 2 +- .../num_positive_accounts_per_nation.txt | 4 +- .../orders_sum_line_price.txt | 2 +- .../orders_sum_vs_count_line_price.txt | 2 +- .../orders_versus_first_orders.txt | 2 +- .../parts_quantity_increase_95_96.txt | 4 +- .../quantile_function_test_2.txt | 2 +- .../quantile_function_test_3.txt | 2 +- .../quantile_function_test_4.txt | 2 +- .../rank_nations_per_region_by_customers.txt | 2 +- .../region_acctbal_breakdown.txt | 2 +- .../region_orders_from_nations_richest.txt | 2 +- .../regional_first_order_best_line_part.txt | 2 +- .../regional_suppliers_percentile.txt | 2 +- .../regions_sum_line_price.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 2 +- tests/test_plan_refsols/simple_var_std.txt | 2 +- tests/test_plan_refsols/singular1.txt | 2 +- tests/test_plan_refsols/singular2.txt | 2 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 2 +- tests/test_plan_refsols/singular6.txt | 2 +- tests/test_plan_refsols/singular7.txt | 6 +- .../sqlite_udf_count_epsilon.txt | 2 +- .../sqlite_udf_covar_pop.txt | 2 +- tests/test_plan_refsols/sqlite_udf_nested.txt | 2 +- .../sqlite_udf_percent_positive.txt | 4 +- .../test_plan_refsols/supplier_best_part.txt | 4 +- .../supplier_pct_national_qty.txt | 2 +- ...ograph_battery_failure_rates_anomalies.txt | 2 +- .../technograph_country_cartesian_oddball.txt | 2 +- ...chnograph_country_combination_analysis.txt | 6 +- ...nograph_country_incident_rate_analysis.txt | 12 +-- ...aph_error_percentages_sun_set_by_error.txt | 4 +- ..._error_rate_sun_set_by_factory_country.txt | 4 +- .../technograph_incident_rate_per_brand.txt | 2 +- .../technograph_most_unreliable_products.txt | 4 +- ...top_5_nations_balance_by_num_suppliers.txt | 2 +- .../top_5_nations_by_num_supplierss.txt | 2 +- .../top_customers_by_orders.txt | 2 +- tests/test_plan_refsols/tpch_q10.txt | 2 +- tests/test_plan_refsols/tpch_q13.txt | 2 +- tests/test_plan_refsols/tpch_q15.txt | 4 +- tests/test_plan_refsols/tpch_q18.txt | 4 +- tests/test_plan_refsols/tpch_q20.txt | 4 +- tests/test_plan_refsols/tpch_q21.txt | 2 +- tests/test_plan_refsols/tpch_q22.txt | 2 +- tests/test_plan_refsols/tpch_q8.txt | 2 +- tests/test_plan_refsols/tpch_q9.txt | 2 +- .../various_aggfuncs_simple.txt | 2 +- .../window_filter_order_1.txt | 2 +- .../window_filter_order_2.txt | 2 +- .../window_filter_order_3.txt | 2 +- .../window_filter_order_4.txt | 2 +- .../window_filter_order_8.txt | 2 +- .../window_filter_order_9.txt | 2 +- .../aggregation_functions_ansi.sql | 36 +++---- .../aggregation_functions_mysql.sql | 36 +++---- .../aggregation_functions_sqlite.sql | 36 +++---- .../defog_broker_basic4_ansi.sql | 31 ++---- .../defog_broker_basic4_mysql.sql | 31 ++---- .../defog_broker_basic4_sqlite.sql | 31 ++---- 138 files changed, 350 insertions(+), 387 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index 9567d30f9..13094ea8a 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -613,41 +613,82 @@ def add_child( # Return the index of the newly created child. return new_child_idx + @staticmethod + def infer_metadata_reverse_cardinality( + metadata: SubcollectionRelationshipMetadata, + ) -> JoinCardinality: + """ + Infers the cardinality of the reverse of a join from parent to child + based on the metadata from the parent->child relationship. + + Args: + `metadata`: the metadata for the sub-collection property mapping + the parent to the child. + + Returns: + The join cardinality for the connection from the child back to the + parent, if it can be inferred. Uses `PLURAL_FILTER` as a fallback. + """ + # If there is no reverse, fall back to plural filter. + if ( + not isinstance(metadata, ReversiblePropertyMetadata) + or metadata.reverse is None + ): + return JoinCardinality.PLURAL_FILTER + + # If the reverse property exists, use its properties to + # infer if the reverse cardinality is singular or plural + # and whether a match always exists or not. + cardinality: JoinCardinality = ( + JoinCardinality.PLURAL_ACCESS + if metadata.reverse.is_plural + else JoinCardinality.SINGULAR_ACCESS + ) + if not metadata.reverse.always_matches: + cardinality = cardinality.add_filter() + return cardinality + def infer_root_reverse_cardinality(self) -> JoinCardinality: """ - TODO + Infers the cardinality of the join connecting the root of the hybrid + tree to its parent context. + + Returns: + The inferred cardinality of the join connecting the root of the + hybrid tree to its parent context. """ - if self.parent is None: - match self.pipeline[0]: - case HybridRoot(): - return JoinCardinality.PLURAL_ACCESS - case HybridCollectionAccess(): - cardinality: JoinCardinality = JoinCardinality.PLURAL_ACCESS - if isinstance(self.pipeline[0].collection, SubCollection): - metadata = self.pipeline[0].collection.subcollection_property - if ( - isinstance(metadata, ReversiblePropertyMetadata) - and metadata.reverse is not None - ): - if metadata.reverse.is_plural: - cardinality = JoinCardinality.PLURAL_ACCESS - else: - cardinality = JoinCardinality.SINGULAR_ACCESS - if not metadata.reverse.always_matches: - cardinality = cardinality.add_filter() - return JoinCardinality.PLURAL_ACCESS - return cardinality - case HybridPartition(): - return self.children[0].subtree.infer_root_reverse_cardinality() - case HybridPartitionChild(): - return self.pipeline[0].subtree.infer_root_reverse_cardinality() - case _: - raise NotImplementedError( - f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" - ) - else: + # Keep traversing upward until we find the root of the current tree. + if self.parent is not None: return self.parent.infer_root_reverse_cardinality() + # Once we find the root, infer the cardinality of the join that would + # connect just this node to the parent context. The rest of the nodes in + # the tree don't matter since they will not affect how many matches + # there are back to the parent context, or whether there is always a + # match or not for each record in the current context. + match self.pipeline[0]: + case HybridRoot(): + return JoinCardinality.PLURAL_ACCESS + case HybridCollectionAccess(): + # For collection accesses, that are not a sub-collection, just + # use plural access. If they are a sub-collection, infer what + # is the cardinality based on the reverse property. + if isinstance(self.pipeline[0].collection, SubCollection): + return self.infer_metadata_reverse_cardinality( + self.pipeline[0].collection.subcollection_property + ) + else: + return JoinCardinality.PLURAL_ACCESS + # For partition & partition child, infer from the underlying child. + case HybridPartition(): + return self.children[0].subtree.infer_root_reverse_cardinality() + case HybridPartitionChild(): + return self.pipeline[0].subtree.infer_root_reverse_cardinality() + case _: + raise NotImplementedError( + f"Invalid start of pipeline: {self.pipeline[0].__class__.__name__}" + ) + def add_successor(self, successor: "HybridTree") -> None: """ Marks two hybrid trees in a predecessor-successor relationship. diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 25f47acba..bb480cc27 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -18,7 +18,6 @@ SimpleJoinMetadata, SimpleTableMetadata, ) -from pydough.metadata.properties import ReversiblePropertyMetadata from pydough.qdag import ( Calculate, CollectionAccess, @@ -847,22 +846,14 @@ def translate_sub_collection( ) # Infer the cardinality of the join from the perspective of the new - # collection to the existing data. - reverse_cardinality: JoinCardinality - if ( - isinstance( - collection_access.subcollection_property, ReversiblePropertyMetadata + # collection to the existing data. Also, if the parent has any + # additional filters on its side that means a row may not always + # exist, then update the reverse cardinality since it may be filtering. + reverse_cardinality: JoinCardinality = ( + HybridTree.infer_metadata_reverse_cardinality( + collection_access.subcollection_property ) - and collection_access.subcollection_property.reverse is not None - ): - if collection_access.subcollection_property.reverse.is_plural: - reverse_cardinality = JoinCardinality.PLURAL_ACCESS - else: - reverse_cardinality = JoinCardinality.SINGULAR_ACCESS - if not collection_access.subcollection_property.reverse.always_matches: - reverse_cardinality = reverse_cardinality.add_filter() - else: - reverse_cardinality = JoinCardinality.PLURAL_ACCESS + ) if (not reverse_cardinality.filters) and (not parent.always_exists()): reverse_cardinality = reverse_cardinality.add_filter() diff --git a/tests/test_plan_refsols/agg_max_ranking.txt b/tests/test_plan_refsols/agg_max_ranking.txt index d73080147..69663c452 100644 --- a/tests/test_plan_refsols/agg_max_ranking.txt +++ b/tests/test_plan_refsols/agg_max_ranking.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('highest_rank', highest_rank)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'highest_rank': t1.highest_rank, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'highest_rank': MAX(cust_rank)}) PROJECT(columns={'c_nationkey': c_nationkey, 'cust_rank': RANKING(args=[], partition=[], order=[(c_acctbal):desc_first], allow_ties=True)}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt index 5a9021e4e..1462eb43f 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_just_europe.txt @@ -3,7 +3,7 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', DEFAULT_ AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) diff --git a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt index 1bc99c0d2..48b873c76 100644 --- a/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt +++ b/tests/test_plan_refsols/agg_orders_by_year_month_vs_europe.txt @@ -3,7 +3,7 @@ ROOT(columns=[('year', year), ('month', month), ('num_european_orders', n_rows), AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month': MONTH(o_orderdate), 'year': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_orderdate': t0.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t1.n_regionkey}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index 81d20edcc..0b267e0e1 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_key', o_orderkey), ('max_ratio', max_ratio)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_ratio': t1.max_ratio, 'o_orderkey': t0.o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) diff --git a/tests/test_plan_refsols/aggregate_on_function_call.txt b/tests/test_plan_refsols/aggregate_on_function_call.txt index d28ac7f15..3bb9b5dab 100644 --- a/tests/test_plan_refsols/aggregate_on_function_call.txt +++ b/tests/test_plan_refsols/aggregate_on_function_call.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('avg_consumer_value', avg_consumer_value)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_consumer_value': t1.avg_consumer_value, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_consumer_value': MAX(IFF(c_acctbal < 0.0:numeric, 0.0:numeric, c_acctbal))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 3f4f8df2b..5cebaf11b 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t1.avg_p_retailprice_1, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice_1': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_then_backref.txt b/tests/test_plan_refsols/aggregate_then_backref.txt index c3fd73955..7c8d13cc8 100644 --- a/tests/test_plan_refsols/aggregate_then_backref.txt +++ b/tests/test_plan_refsols/aggregate_then_backref.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_key', l_partkey), ('supplier_key', l_suppkey), ('order_key', l_orderkey), ('order_quantity_ratio', l_quantity / DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[]) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index 2ef53d9c2..b719a64e1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 339aaf5d5..f29c91858 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) diff --git a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt index 1dbe56459..3eea3dc6d 100644 --- a/tests/test_plan_refsols/avg_acctbal_wo_debt.txt +++ b/tests/test_plan_refsols/avg_acctbal_wo_debt.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('avg_bal_without_debt_erasure', sum_sum_expr_1 / sum_count_expr_1)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'r_name': t0.r_name, 'sum_count_expr_1': t1.sum_count_expr_1, 'sum_sum_expr_1': t1.sum_sum_expr_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_count_expr_1': SUM(count_expr_1), 'sum_sum_expr_1': SUM(sum_expr_1)}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_expr_1': t1.count_expr_1, 'n_regionkey': t0.n_regionkey, 'sum_expr_1': t1.sum_expr_1}) diff --git a/tests/test_plan_refsols/avg_order_diff_per_customer.txt b/tests/test_plan_refsols/avg_order_diff_per_customer.txt index e1703a4fb..edd73115c 100644 --- a/tests/test_plan_refsols/avg_order_diff_per_customer.txt +++ b/tests/test_plan_refsols/avg_order_diff_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('avg_diff', avg_diff)], orderings=[(avg_diff):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_diff': t1.avg_diff, 'c_name': t0.c_name}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_1.txt b/tests/test_plan_refsols/bad_child_reuse_1.txt index 2e05042d5..ada5bf42b 100644 --- a/tests/test_plan_refsols/bad_child_reuse_1.txt +++ b/tests/test_plan_refsols/bad_child_reuse_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_2.txt b/tests/test_plan_refsols/bad_child_reuse_2.txt index 452929916..ea3ef915c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_2.txt +++ b/tests/test_plan_refsols/bad_child_reuse_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_3.txt b/tests/test_plan_refsols/bad_child_reuse_3.txt index 452929916..ea3ef915c 100644 --- a/tests/test_plan_refsols/bad_child_reuse_3.txt +++ b/tests/test_plan_refsols/bad_child_reuse_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_cust', n_cust)], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': n_cust, 'n_rows': n_rows}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_cust': RELSIZE(args=[], partition=[c_nationkey], order=[]), 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_4.txt b/tests/test_plan_refsols/bad_child_reuse_4.txt index edcdf502e..cdd0319db 100644 --- a/tests/test_plan_refsols/bad_child_reuse_4.txt +++ b/tests/test_plan_refsols/bad_child_reuse_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last], limit=10:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/bad_child_reuse_5.txt b/tests/test_plan_refsols/bad_child_reuse_5.txt index fb5de210c..3bcfa3875 100644 --- a/tests/test_plan_refsols/bad_child_reuse_5.txt +++ b/tests/test_plan_refsols/bad_child_reuse_5.txt @@ -1,7 +1,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(c_acctbal):desc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=ANTI, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_acctbal):desc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_a.txt b/tests/test_plan_refsols/common_prefix_a.txt index afee2ad7c..d7614e2d9 100644 --- a/tests/test_plan_refsols/common_prefix_a.txt +++ b/tests/test_plan_refsols/common_prefix_a.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 7803949a3..33cb44ec2 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('part_qty', ps_availqty), ('qty_shipped', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(s_name):asc_first]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ae.txt b/tests/test_plan_refsols/common_prefix_ae.txt index 32e124483..b52d1aa50 100644 --- a/tests/test_plan_refsols/common_prefix_ae.txt +++ b/tests/test_plan_refsols/common_prefix_ae.txt @@ -1,11 +1,11 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_customers), ('customer_name', customer_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'customer_name': t1.customer_name, 'n_customers': t1.n_customers, 'n_name': t0.n_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'customer_name': MAX(c_name), 'n_customers': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t1.c_name, 'c_nationkey': t0.c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) FILTER(condition=ISIN(o_orderkey, [1070368, 1347104, 1472135, 2351457]:array[unknown]), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_af.txt b/tests/test_plan_refsols/common_prefix_af.txt index ce89a069c..79fe346e2 100644 --- a/tests/test_plan_refsols/common_prefix_af.txt +++ b/tests/test_plan_refsols/common_prefix_af.txt @@ -1,12 +1,12 @@ ROOT(columns=[('nation_name', n_name), ('n_customers', n_rows), ('customer_name', max_anything_c_name)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_anything_c_name': t1.max_anything_c_name, 'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'max_anything_c_name': max_anything_c_name, 'n_rows': n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'max_anything_c_name': MAX(anything_c_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_c_name': t1.anything_c_name, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'n_rows': COUNT()}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_al.txt b/tests/test_plan_refsols/common_prefix_al.txt index f674f8512..6f0d546f6 100644 --- a/tests/test_plan_refsols/common_prefix_al.txt +++ b/tests/test_plan_refsols/common_prefix_al.txt @@ -1,9 +1,9 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', n_rows_1)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t0.n_rows_1}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'n_rows_1': n_rows_1}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_am.txt b/tests/test_plan_refsols/common_prefix_am.txt index eb6b60f1f..a6a1c2691 100644 --- a/tests/test_plan_refsols/common_prefix_am.txt +++ b/tests/test_plan_refsols/common_prefix_am.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows_1, 0:numeric)), ('n_no_tax_discount', n_rows)], orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'n_rows_1': t0.n_rows}) LIMIT(limit=10:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]), columns={'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_an.txt b/tests/test_plan_refsols/common_prefix_an.txt index cfab97d0b..98beec996 100644 --- a/tests/test_plan_refsols/common_prefix_an.txt +++ b/tests/test_plan_refsols/common_prefix_an.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric)), ('n_no_tax_discount', agg_1)], orderings=[(c_custkey):asc_first]) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[c_nationkey], order=[]) & DEFAULT_TO(sum_n_rows, 0:numeric) > 0:numeric & sum_n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=50:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'n_rows': n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey}) @@ -10,7 +10,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric FILTER(condition=l_discount == 0:numeric & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_orderkey': l_orderkey, 'l_tax': l_tax}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ao.txt b/tests/test_plan_refsols/common_prefix_ao.txt index 508b413ff..aff3ce517 100644 --- a/tests/test_plan_refsols/common_prefix_ao.txt +++ b/tests/test_plan_refsols/common_prefix_ao.txt @@ -1,8 +1,8 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric)), ('n_no_tax_discount', DEFAULT_TO(n_rows, 0:numeric)), ('n_part_purchases', sum_n_rows)], orderings=[(c_custkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(agg_1, 0:numeric) > RELAVG(args=[DEFAULT_TO(agg_1, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={'agg_1': agg_1, 'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t0.n_rows, 'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t0.sum_n_rows}) LIMIT(limit=20:numeric, columns={'c_custkey': c_custkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}, orderings=[(c_custkey):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) LIMIT(limit=35:numeric, columns={'c_custkey': c_custkey}, orderings=[(c_custkey):asc_first]) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('cust_key', c_custkey), ('n_orders', DEFAULT_TO(agg_1, 0:numeric) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 7f58c39d0..18f665258 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -1,5 +1,5 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quantity', ps_availqty), ('supplier_nation', n_name)], orderings=[(p_name):asc_first]) - JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.p_partkey == t1.ps_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'p_name': t0.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 944f37615..32e6086dd 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -1,13 +1,13 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier', s_name), ('best_part', p_name), ('best_quantity', ps_availqty)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_b.txt b/tests/test_plan_refsols/common_prefix_b.txt index 74c69984a..0dde64141 100644 --- a/tests/test_plan_refsols/common_prefix_b.txt +++ b/tests/test_plan_refsols/common_prefix_b.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', n_customers), ('n_suppliers', n_suppliers)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_nations': t1.n_nations, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_nations': COUNT(), 'n_suppliers': SUM(n_suppliers)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 26411f439..73d643b77 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_parts', n_parts)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_parts': t1.sum_agg_22, 'n_suppliers': t1.sum_sum_expr_18, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_22': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index c29d975b2..34be4b842 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,14 +1,14 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_customers', sum_n_rows), ('n_suppliers', n_suppliers), ('n_orders_94', DEFAULT_TO(sum_sum_expr_7, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_1, 'n_suppliers': t1.sum_agg_29, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_expr_7': t1.sum_sum_expr_7, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_1': COUNT(), 'sum_agg_29': SUM(agg_29), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_expr_7': SUM(sum_expr_7), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_29': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_10': t0.sum_expr_10, 'sum_expr_7': t0.sum_expr_7, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_10': t1.sum_expr_10, 'sum_expr_7': t1.sum_expr_7, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_e.txt b/tests/test_plan_refsols/common_prefix_e.txt index a6acb8f40..48a7f6383 100644 --- a/tests/test_plan_refsols/common_prefix_e.txt +++ b/tests/test_plan_refsols/common_prefix_e.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_f.txt b/tests/test_plan_refsols/common_prefix_f.txt index f47fe1ec5..27d6f82b4 100644 --- a/tests/test_plan_refsols/common_prefix_f.txt +++ b/tests/test_plan_refsols/common_prefix_f.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_nations', sum_agg_1), ('n_suppliers', sum_agg_8)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'r_name': t0.r_name, 'sum_agg_1': t1.sum_agg_1, 'sum_agg_8': t1.sum_agg_8}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'sum_agg_1': COUNT(), 'sum_agg_8': SUM(agg_8)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_8': t1.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_g.txt b/tests/test_plan_refsols/common_prefix_g.txt index 3db1c35e4..a79c04f7d 100644 --- a/tests/test_plan_refsols/common_prefix_g.txt +++ b/tests/test_plan_refsols/common_prefix_g.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', r_name), ('n_customers', n_customers), ('n_suppliers', n_suppliers), ('n_nations', sum_agg_2)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_customers': t1.n_customers, 'n_suppliers': t1.n_suppliers, 'r_name': t0.r_name, 'sum_agg_2': t1.sum_agg_2}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_customers': SUM(n_rows), 'n_suppliers': SUM(n_suppliers), 'sum_agg_2': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'n_suppliers': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index 97ab075cc..e13efa856 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,17 +1,17 @@ ROOT(columns=[('name', r_name), ('n_nations', n_nations), ('n_orders', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('n_customers', sum_n_rows), ('n_parts', sum_agg_22_1), ('n_suppliers', sum_sum_expr_18_1)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nations': t1.sum_agg_0, 'r_name': t0.r_name, 'sum_agg_22_1': t1.sum_agg_22_1, 'sum_n_rows': t1.sum_n_rows_1, 'sum_sum_expr_18_1': t1.sum_sum_expr_18_1, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_agg_0': COUNT(), 'sum_agg_22_1': SUM(agg_22), 'sum_n_rows_1': SUM(n_rows), 'sum_sum_expr_18_1': SUM(sum_expr_18), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_22': t1.sum_n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_18': t1.sum_expr_18, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'sum_expr_18': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.PARTSUPP, columns={'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_i.txt b/tests/test_plan_refsols/common_prefix_i.txt index fdb64f108..cf31b1b01 100644 --- a/tests/test_plan_refsols/common_prefix_i.txt +++ b/tests/test_plan_refsols/common_prefix_i.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', n_name), ('n_customers', n_rows), ('n_selected_orders', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(n_rows):desc_last, (n_name):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'c_nationkey': c_nationkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(o_orderdate) == 12:numeric & YEAR(o_orderdate) == 1992:numeric & o_clerk == 'Clerk#000000272':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index c03ee50fe..b44e64f3d 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,13 +1,13 @@ ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index 1ec0ef2aa..580ad9af1 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -2,10 +2,10 @@ ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_rows >= 5:numeric, columns={'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index e7b6f51be..b43c9a861 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_agg_11, 0:numeric))], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_agg_11': sum_agg_11, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t0.sum_agg_11, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_11': t1.sum_agg_11, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_agg_11': SUM(agg_11), 'sum_p_retailprice': SUM(p_retailprice)}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index b4603c210..7e1c73858 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', n_small_parts)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'n_small_parts': sum_agg_5, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t0.sum_agg_5, 'sum_p_retailprice': t0.sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_agg_5': t1.sum_agg_5, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=QUARTER(o_orderdate) == 4:numeric & YEAR(o_orderdate) == 1996:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=sum_agg_5 > 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'sum_agg_5': sum_agg_5, 'sum_p_retailprice': sum_p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_p.txt b/tests/test_plan_refsols/common_prefix_p.txt index 17bd44462..da57de9e5 100644 --- a/tests/test_plan_refsols/common_prefix_p.txt +++ b/tests/test_plan_refsols/common_prefix_p.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows), ('n_parts_ordered', n_rows_1), ('n_distinct_parts', ndistinct_l_partkey)], orderings=[(ndistinct_l_partkey / n_rows_1):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows, 'ndistinct_l_partkey': t1.ndistinct_l_partkey}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index f08e39969..1adcbc62c 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric)), ('line_price', max_l_extendedprice), ('part_name', max_p_name)], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_l_extendedprice': t1.max_l_extendedprice, 'max_p_name': t1.max_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_l_extendedprice': MAX(l_extendedprice), 'max_p_name': MAX(p_name), 'sum_o_totalprice': SUM(o_totalprice)}) JOIN(condition=t0.o_orderkey == t1.o_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey, 'o_totalprice': t0.o_totalprice, 'p_name': t1.p_name}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index f024a6127..2f9ad6dc7 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price', max_anything_l_extendedprice), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'max_anything_l_extendedprice': t1.max_anything_l_extendedprice, 'max_anything_p_name': t1.max_anything_p_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'max_anything_l_extendedprice': max_anything_l_extendedprice, 'max_anything_p_name': max_anything_p_name, 'o_custkey': o_custkey, 'sum_o_totalprice': sum_o_totalprice}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_anything_l_extendedprice': MAX(anything_l_extendedprice), 'max_anything_p_name': MAX(anything_p_name), 'sum_n_rows': SUM(n_rows), 'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 2dbcd5430..736fcb4c2 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_recent_order_total', n_rows), ('most_recent_order_distinct', ndistinct_l_suppkey)], orderings=[(c_name):asc_first]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_t.txt b/tests/test_plan_refsols/common_prefix_t.txt index 873834947..1b9a18e15 100644 --- a/tests/test_plan_refsols/common_prefix_t.txt +++ b/tests/test_plan_refsols/common_prefix_t.txt @@ -1,12 +1,12 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'INDIA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_u.txt b/tests/test_plan_refsols/common_prefix_u.txt index 1a4c5e05e..5129a5674 100644 --- a/tests/test_plan_refsols/common_prefix_u.txt +++ b/tests/test_plan_refsols/common_prefix_u.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:numeric))], orderings=[(DEFAULT_TO(sum_sum_l_quantity, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_sum_l_quantity': t1.sum_sum_l_quantity}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name, 'c_nationkey': c_nationkey}) @@ -7,7 +7,7 @@ ROOT(columns=[('name', c_name), ('total_qty', DEFAULT_TO(sum_sum_l_quantity, 0:n SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=sum_n_rows > 0:numeric, columns={'o_custkey': o_custkey, 'sum_sum_l_quantity': sum_sum_l_quantity}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_sum_l_quantity': SUM(sum_l_quantity)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=l_shipmode == 'RAIL':string & l_tax == 0:numeric, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/common_prefix_x.txt b/tests/test_plan_refsols/common_prefix_x.txt index 6283b3bc5..e1afb10cf 100644 --- a/tests/test_plan_refsols/common_prefix_x.txt +++ b/tests/test_plan_refsols/common_prefix_x.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', n_rows)], orderings=[(n_rows):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=sum_n_rows > 0:numeric, columns={'n_rows': n_rows, 'o_custkey': o_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/common_prefix_y.txt b/tests/test_plan_refsols/common_prefix_y.txt index 2bff6eeda..a5ae2d504 100644 --- a/tests/test_plan_refsols/common_prefix_y.txt +++ b/tests/test_plan_refsols/common_prefix_y.txt @@ -1,9 +1,9 @@ ROOT(columns=[('name', c_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_name):asc_first], limit=5:numeric) FILTER(condition=DEFAULT_TO(sum_n_rows, 0:numeric) == 0:numeric, columns={'c_name': c_name, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'o_custkey': t0.o_custkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=l_tax == 0:numeric, columns={'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 71b6a0111..0c3125585 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_13.txt b/tests/test_plan_refsols/correl_13.txt index 5a738e623..10bc69870 100644 --- a/tests/test_plan_refsols/correl_13.txt +++ b/tests/test_plan_refsols/correl_13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=s_nationkey <= 3:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 5341118f8..257f6b39f 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'s_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'supplier_avg_price': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 0795bc81e..1277ca812 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': NDISTINCT(s_suppkey)}) JOIN(condition=t1.p_retailprice < t0.global_avg_price * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.supplier_avg_price & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 'ps_partkey': t1.ps_partkey, 'ps_supplycost': t1.ps_supplycost, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t0.supplier_avg_price}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t0.s_suppkey, 'supplier_avg_price': t1.supplier_avg_price}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'global_avg_price': t0.global_avg_price, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'global_avg_price': AVG(p_retailprice)}) SCAN(table=tpch.PART, columns={'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/correl_2.txt b/tests/test_plan_refsols/correl_2.txt index 7bfc095a3..bf42b8cd6 100644 --- a/tests/test_plan_refsols/correl_2.txt +++ b/tests/test_plan_refsols/correl_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_selected_custs', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(n_name):asc_first]) - JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown)) == t1.expr_1 & t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_rows': t1.n_rows}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name}) FILTER(condition=NOT(STARTSWITH(r_name, 'A':string)), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/correl_29.txt b/tests/test_plan_refsols/correl_29.txt index 78b655da0..fe5b94824 100644 --- a/tests/test_plan_refsols/correl_29.txt +++ b/tests/test_plan_refsols/correl_29.txt @@ -1,10 +1,10 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_rows_1), ('min_cust_acctbal', min_c_acctbal), ('max_cust_acctbal', max_c_acctbal)], orderings=[(anything_n_regionkey):asc_first, (anything_n_name):asc_first]) JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t0.max_c_acctbal, 'min_c_acctbal': t0.min_c_acctbal, 'n_rows': t0.n_rows, 'n_rows_1': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n_name': t0.anything_n_name, 'anything_n_regionkey': t0.anything_n_regionkey, 'max_c_acctbal': t1.max_c_acctbal, 'min_c_acctbal': t1.min_c_acctbal, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows}) FILTER(condition=ISIN(anything_n_regionkey, [1, 3]:array[unknown]), columns={'anything_n_name': anything_n_name, 'anything_n_regionkey': anything_n_regionkey, 'n_nationkey': n_nationkey, 'n_rows': n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -13,7 +13,7 @@ ROOT(columns=[('region_key', anything_n_regionkey), ('nation_name', anything_n_n SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index cda02872f..27cb4106c 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -3,7 +3,7 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_region_name': ANYTHING(LOWER(r_name)), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_cust_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t0.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_cust_acctbal': t1.avg_cust_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_cust_acctbal': AVG(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) @@ -13,7 +13,7 @@ ROOT(columns=[('region_name', anything_region_name), ('nation_name', anything_n_ AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_supp_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t0.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_supp_acctbal': t1.avg_supp_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_supp_acctbal': AVG(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/correl_6.txt b/tests/test_plan_refsols/correl_6.txt index 6f798d580..3b711b4d7 100644 --- a/tests/test_plan_refsols/correl_6.txt +++ b/tests/test_plan_refsols/correl_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_prefix_nations', n_rows)], orderings=[]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt index 38615974e..8106d43b0 100644 --- a/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt +++ b/tests/test_plan_refsols/count_at_most_100_suppliers_per_nation.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('n_top_suppliers', DEFAULT_TO(count_s_suppkey, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) LIMIT(limit=100:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}, orderings=[(s_acctbal):asc_last]) diff --git a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt index 223617b10..7f2716e17 100644 --- a/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt +++ b/tests/test_plan_refsols/count_multiple_subcollections_alongside_aggs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', n_rows), ('num_suppliers', agg_3), ('customer_to_supplier_wealth_ratio', DEFAULT_TO(sum_c_acctbal, 0:numeric) / DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_3': t1.n_rows, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/count_single_subcollection.txt b/tests/test_plan_refsols/count_single_subcollection.txt index 2934bd9b0..c815fa837 100644 --- a/tests/test_plan_refsols/count_single_subcollection.txt +++ b/tests/test_plan_refsols/count_single_subcollection.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_nationkey), ('num_customers', num_customers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'num_customers': t1.num_customers}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'num_customers': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/customer_largest_order_deltas.txt b/tests/test_plan_refsols/customer_largest_order_deltas.txt index f147342e9..0d15bedd4 100644 --- a/tests/test_plan_refsols/customer_largest_order_deltas.txt +++ b/tests/test_plan_refsols/customer_largest_order_deltas.txt @@ -1,11 +1,11 @@ ROOT(columns=[('name', c_name), ('largest_diff', IFF(ABS(min_diff) > max_diff, min_diff, max_diff))], orderings=[(IFF(ABS(min_diff) > max_diff, min_diff, max_diff)):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'max_diff': t1.max_diff, 'min_diff': t1.min_diff}) FILTER(condition=c_mktsegment == 'AUTOMOBILE':string, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'max_diff': MAX(revenue_delta), 'min_diff': MIN(revenue_delta)}) PROJECT(columns={'o_custkey': o_custkey, 'revenue_delta': DEFAULT_TO(sum_r, 0:numeric) - PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])}) FILTER(condition=PRESENT(PREV(args=[DEFAULT_TO(sum_r, 0:numeric)], partition=[o_custkey], order=[(o_orderdate):asc_last])), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'sum_r': sum_r}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate, 'sum_r': t1.sum_r}) FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_r': SUM(l_extendedprice * 1:numeric - l_discount)}) diff --git a/tests/test_plan_refsols/customer_most_recent_orders.txt b/tests/test_plan_refsols/customer_most_recent_orders.txt index c9265fae6..0fe2509df 100644 --- a/tests/test_plan_refsols/customer_most_recent_orders.txt +++ b/tests/test_plan_refsols/customer_most_recent_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('total_recent_value', DEFAULT_TO(sum_o_totalprice, 0:numeric))], orderings=[(DEFAULT_TO(sum_o_totalprice, 0:numeric)):desc_last], limit=3:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'sum_o_totalprice': t1.sum_o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) <= 5:numeric, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/customers_sum_line_price.txt b/tests/test_plan_refsols/customers_sum_line_price.txt index 799b8abe5..38916a55c 100644 --- a/tests/test_plan_refsols/customers_sum_line_price.txt +++ b/tests/test_plan_refsols/customers_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', c_custkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index add80975e..a0c1f8f96 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,8 +1,8 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.r_regionkey == t1.r_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt index 3115b76b7..55851831c 100644 --- a/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt +++ b/tests/test_plan_refsols/epoch_users_most_cold_war_searches.txt @@ -1,5 +1,5 @@ ROOT(columns=[('user_name', user_name), ('n_cold_war_searches', n_cold_war_searches)], orderings=[(n_cold_war_searches):desc_last, (user_name):asc_first], limit=3:numeric) - JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) + JOIN(condition=t0.user_id == t1.anything_search_user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_cold_war_searches': t1.n_cold_war_searches, 'user_name': t0.user_name}) SCAN(table=USERS, columns={'user_id': user_id, 'user_name': user_name}) AGGREGATE(keys={'anything_search_user_id': anything_search_user_id}, aggregations={'n_cold_war_searches': COUNT()}) AGGREGATE(keys={'search_id': search_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id)}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index 3842761d5..a445de391 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order_price', o_totalprice)], orderings=[(o_totalprice):desc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/hour_minute_day.txt b/tests/test_plan_refsols/hour_minute_day.txt index 294688d84..dd12ad5a3 100644 --- a/tests/test_plan_refsols/hour_minute_day.txt +++ b/tests/test_plan_refsols/hour_minute_day.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId), ('_expr0', HOUR(sbTxDateTime)), ('_expr1', MINUTE(sbTxDateTime)), ('_expr2', SECOND(sbTxDateTime))], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) + JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbTxDateTime': t0.sbTxDateTime, 'sbTxId': t0.sbTxId}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime, 'sbTxId': sbTxId, 'sbTxTickerId': sbTxTickerId}) FILTER(condition=ISIN(sbTickerSymbol, ['AAPL', 'GOOGL', 'NFLX']:array[unknown]), columns={'sbTickerId': sbTickerId}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerSymbol': sbTickerSymbol}) diff --git a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt index 589daa0bc..a013e6502 100644 --- a/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt +++ b/tests/test_plan_refsols/lineitems_access_cust_supplier_nations.txt @@ -1,6 +1,6 @@ ROOT(columns=[('ship_year', YEAR(l_shipdate)), ('supplier_nation', supplier_nation), ('customer_nation', n_name), ('value', l_extendedprice * 1.0:numeric - l_discount)], orderings=[]) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name, 'supplier_nation': t0.n_name}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate, 'n_name': t1.n_name}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt index 531abfce9..1987fec13 100644 --- a/tests/test_plan_refsols/lines_german_supplier_economy_part.txt +++ b/tests/test_plan_refsols/lines_german_supplier_economy_part.txt @@ -1,6 +1,6 @@ ROOT(columns=[('order_key', l_orderkey), ('ship_date', l_shipdate), ('extended_price', l_extendedprice)], orderings=[]) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_shipdate': t0.l_shipdate}) - JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_shipdate': t0.l_shipdate, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt index e8baa1741..75ee4ba1f 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt index 55cdf7f5d..ae366e0cd 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', count_s_suppkey), ('total_suppliers', count_s_suppkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.count_s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt index 6198ebc29..f5bf7c1f4 100644 --- a/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt +++ b/tests/test_plan_refsols/mostly_positive_accounts_per_nation3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & DEFAULT_TO(t0.count_s_suppkey, 0:numeric) > 0.5:numeric * t1.total_suppliers, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt index 18e86d4d9..cc5f9cfd0 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_multiple_calcs.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('total_consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('total_supplier_value', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('avg_consumer_value', avg_c_acctbal), ('avg_supplier_value', avg_s_acctbal), ('best_consumer_value', max_c_acctbal), ('best_supplier_value', max_s_acctbal)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'avg_s_acctbal': t1.avg_s_acctbal_1, 'max_c_acctbal': t0.max_c_acctbal, 'max_s_acctbal': t1.max_s_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal_1': AVG(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt index a835cca09..94e0925e0 100644 --- a/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt +++ b/tests/test_plan_refsols/multiple_simple_aggregations_single_calc.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', n_nationkey), ('consumer_value', DEFAULT_TO(sum_c_acctbal, 0:numeric)), ('producer_value', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t0.sum_c_acctbal, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_c_acctbal': t1.sum_c_acctbal}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_c_acctbal': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/nation_acctbal_breakdown.txt b/tests/test_plan_refsols/nation_acctbal_breakdown.txt index 23594226f..baee60be6 100644 --- a/tests/test_plan_refsols/nation_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/nation_acctbal_breakdown.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_name': t0.n_name, 'n_red_acctbal': t1.n_red_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AMERICA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index a204db9ef..cacdcf7ea 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', o_orderkey), ('order_value', o_totalprice), ('value_percentage', value_percentage)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t1.c_name, 'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'value_percentage': t1.value_percentage}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt index 81fcffc76..4742eb831 100644 --- a/tests/test_plan_refsols/nations_order_by_num_suppliers.txt +++ b/tests/test_plan_refsols/nations_order_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/nations_sum_line_price.txt b/tests/test_plan_refsols/nations_sum_line_price.txt index 6c3ad7c6f..1e4b3221c 100644 --- a/tests/test_plan_refsols/nations_sum_line_price.txt +++ b/tests/test_plan_refsols/nations_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', n_nationkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'l_extendedprice': t1.l_extendedprice}) diff --git a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt index add0e0f3c..58e419b53 100644 --- a/tests/test_plan_refsols/num_positive_accounts_per_nation.txt +++ b/tests/test_plan_refsols/num_positive_accounts_per_nation.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', n_name), ('suppliers_in_black', DEFAULT_TO(count_s_suppkey, 0:numeric)), ('total_suppliers', total_suppliers)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t0.count_s_suppkey, 'n_name': t0.n_name, 'total_suppliers': t1.total_suppliers}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'count_s_suppkey': t1.count_s_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_suppkey': COUNT(s_suppkey)}) FILTER(condition=s_acctbal > 0.0:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/orders_sum_line_price.txt b/tests/test_plan_refsols/orders_sum_line_price.txt index e0220e6ff..0ffd28d94 100644 --- a/tests/test_plan_refsols/orders_sum_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt index 455520f65..046adabcd 100644 --- a/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt +++ b/tests/test_plan_refsols/orders_sum_vs_count_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', o_orderkey), ('lavg', DEFAULT_TO(sum_l_extendedprice, 0:numeric) / count_l_extendedprice)], orderings=[]) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'count_l_extendedprice': t1.count_l_extendedprice, 'o_orderkey': t0.o_orderkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'count_l_extendedprice': COUNT(l_extendedprice), 'sum_l_extendedprice': SUM(l_extendedprice)}) SCAN(table=tpch.LINEITEM, columns={'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 915d68fb4..8ac11284d 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt index be38b3dc5..16762bac8 100644 --- a/tests/test_plan_refsols/parts_quantity_increase_95_96.txt +++ b/tests/test_plan_refsols/parts_quantity_increase_95_96.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', p_name), ('qty_95', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('qty_96', DEFAULT_TO(agg_1, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) - DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (p_name):asc_first], limit=3:numeric) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_1': t1.sum_l_quantity, 'p_name': t0.p_name, 'sum_l_quantity': t0.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t0.p_name, 'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/quantile_function_test_2.txt b/tests/test_plan_refsols/quantile_function_test_2.txt index 163793d83..39d7a7218 100644 --- a/tests/test_plan_refsols/quantile_function_test_2.txt +++ b/tests/test_plan_refsols/quantile_function_test_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_3.txt b/tests/test_plan_refsols/quantile_function_test_3.txt index 163793d83..39d7a7218 100644 --- a/tests/test_plan_refsols/quantile_function_test_3.txt +++ b/tests/test_plan_refsols/quantile_function_test_3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/quantile_function_test_4.txt b/tests/test_plan_refsols/quantile_function_test_4.txt index b49c995d4..1fe7deeea 100644 --- a/tests/test_plan_refsols/quantile_function_test_4.txt +++ b/tests/test_plan_refsols/quantile_function_test_4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('orders_min', agg_8), ('orders_1_percent', agg_1), ('orders_10_percent', agg_0), ('orders_25_percent', agg_2), ('orders_median', agg_7), ('orders_75_percent', agg_3), ('orders_90_percent', agg_4), ('orders_99_percent', agg_5), ('orders_max', agg_6)], orderings=[(n_name):asc_first]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'agg_0': t1.agg_0, 'agg_1': t1.agg_1, 'agg_2': t1.agg_2, 'agg_3': t1.agg_3, 'agg_4': t1.agg_4, 'agg_5': t1.agg_5, 'agg_6': t1.agg_6, 'agg_7': t1.agg_7, 'agg_8': t1.agg_8, 'n_name': t0.n_name, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) LIMIT(limit=5:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}, orderings=[(n_name):asc_first]) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt index 610f76d3a..acb2cb036 100644 --- a/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt +++ b/tests/test_plan_refsols/rank_nations_per_region_by_customers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rank', RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first]))], orderings=[(RANKING(args=[], partition=[n_regionkey], order=[(n_rows):desc_first])):asc_first], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/region_acctbal_breakdown.txt b/tests/test_plan_refsols/region_acctbal_breakdown.txt index 88446e286..b6193e52f 100644 --- a/tests/test_plan_refsols/region_acctbal_breakdown.txt +++ b/tests/test_plan_refsols/region_acctbal_breakdown.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_red_acctbal', n_red_acctbal), ('n_black_acctbal', n_black_acctbal), ('median_red_acctbal', median_red_acctbal), ('median_black_acctbal', median_black_acctbal), ('median_overall_acctbal', median_overall_acctbal)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'median_black_acctbal': t1.median_black_acctbal, 'median_overall_acctbal': t1.median_overall_acctbal, 'median_red_acctbal': t1.median_red_acctbal, 'n_black_acctbal': t1.n_black_acctbal, 'n_red_acctbal': t1.n_red_acctbal, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'median_black_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'median_overall_acctbal': MEDIAN(c_acctbal), 'median_red_acctbal': MEDIAN(KEEP_IF(c_acctbal, c_acctbal < 0:numeric)), 'n_black_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal >= 0:numeric)), 'n_red_acctbal': COUNT(KEEP_IF(c_acctbal, c_acctbal < 0:numeric))}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index 3a4c4a90a..b8c4cc603 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/regional_first_order_best_line_part.txt b/tests/test_plan_refsols/regional_first_order_best_line_part.txt index 3f1e334ca..f3af15301 100644 --- a/tests/test_plan_refsols/regional_first_order_best_line_part.txt +++ b/tests/test_plan_refsols/regional_first_order_best_line_part.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('part_name', p_name)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False) == 1:numeric, columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index 93b25c240..6a369877b 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', s_name)], orderings=[]) FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/regions_sum_line_price.txt b/tests/test_plan_refsols/regions_sum_line_price.txt index 67a0c6401..c36ca2e5a 100644 --- a/tests/test_plan_refsols/regions_sum_line_price.txt +++ b/tests/test_plan_refsols/regions_sum_line_price.txt @@ -1,5 +1,5 @@ ROOT(columns=[('okey', r_regionkey), ('lsum', DEFAULT_TO(sum_l_extendedprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'r_regionkey': t0.r_regionkey, 'sum_l_extendedprice': t1.sum_l_extendedprice}) SCAN(table=tpch.REGION, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_l_extendedprice': SUM(l_extendedprice)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 9d44d3f56..62c91f92a 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', avg_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t1.avg_p_retailprice, 'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'avg_p_retailprice': AVG(p_retailprice), 'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/simple_var_std.txt b/tests/test_plan_refsols/simple_var_std.txt index 505bad30b..a829a911a 100644 --- a/tests/test_plan_refsols/simple_var_std.txt +++ b/tests/test_plan_refsols/simple_var_std.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('var', pop_var), ('std', pop_std), ('sample_var', sample_var), ('sample_std', sample_std), ('pop_var', pop_var), ('pop_std', pop_std)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'pop_std': t1.pop_std, 'pop_var': t1.pop_var, 'sample_std': t1.sample_std, 'sample_var': t1.sample_var}) FILTER(condition=ISIN(n_name, ['ALGERIA', 'ARGENTINA']:array[unknown]), columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'pop_std': POPULATION_STD(s_acctbal), 'pop_var': POPULATION_VARIANCE(s_acctbal), 'sample_std': SAMPLE_STD(s_acctbal), 'sample_var': SAMPLE_VARIANCE(s_acctbal)}) diff --git a/tests/test_plan_refsols/singular1.txt b/tests/test_plan_refsols/singular1.txt index 665f6f40c..a33283726 100644 --- a/tests/test_plan_refsols/singular1.txt +++ b/tests/test_plan_refsols/singular1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('nation_4_name', n_name)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) FILTER(condition=n_nationkey == 4:numeric, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/singular2.txt b/tests/test_plan_refsols/singular2.txt index 294c7666f..0020aff21 100644 --- a/tests/test_plan_refsols/singular2.txt +++ b/tests/test_plan_refsols/singular2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('okey', o_orderkey)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey}) FILTER(condition=c_custkey == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index e3f6787ca..bac588a67 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index f8ce4600b..b73d59ebf 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index a547c658e..3e9681540 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', n_name)], orderings=[(l_receiptdate):asc_first, (c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'l_receiptdate': t1.l_receiptdate, 'n_name': t1.n_name}) FILTER(condition=c_nationkey == 4:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 9bf93569a..e49975afb 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -1,11 +1,11 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('n_orders', n_orders)], orderings=[(n_orders):desc_last, (s_name):asc_first], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_orders': t1.n_orders, 'p_name': t1.p_name, 's_name': t0.s_name}) FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'n_orders': DEFAULT_TO(n_rows, 0:numeric), 'p_name': p_name, 'ps_suppkey': ps_suppkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(n_rows, 0:numeric)):desc_first, (p_name):asc_last]) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t0.p_name, 'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt index 2eff260ee..e66ff4ccf 100644 --- a/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt +++ b/tests/test_plan_refsols/sqlite_udf_count_epsilon.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', r_name), ('n_cust', n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=EPSILON(c_acctbal, avg_balance, avg_balance * 0.1:numeric), columns={'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt index 3adfbe06d..2f501dbc7 100644 --- a/tests/test_plan_refsols/sqlite_udf_covar_pop.txt +++ b/tests/test_plan_refsols/sqlite_udf_covar_pop.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('cvp_ab_otp', ROUND(agg_0, 3:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_0': t1.agg_0, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'agg_0': POPULATION_COVARIANCE(c_acctbal, o_totalprice / 1000000.0:numeric)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_regionkey': t0.n_regionkey, 'o_totalprice': t1.o_totalprice}) diff --git a/tests/test_plan_refsols/sqlite_udf_nested.txt b/tests/test_plan_refsols/sqlite_udf_nested.txt index ca252a32f..f2fcd5190 100644 --- a/tests/test_plan_refsols/sqlite_udf_nested.txt +++ b/tests/test_plan_refsols/sqlite_udf_nested.txt @@ -2,7 +2,7 @@ ROOT(columns=[('p', ROUND(percentage_expr_1, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'percentage_expr_1': PERCENTAGE(DECODE3(c_mktsegment, 'BUILDING':string, POSITIVE(c_acctbal), 'MACHINERY':string, EPSILON(c_acctbal, min_bal, 500:numeric), 'HOUSEHOLD':string, INTEGER(FORMAT_DATETIME('%j':string, min_o_orderdate)) == '366':string, False:bool))}) FILTER(condition=n_rows > 0:numeric, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': min_bal, 'min_o_orderdate': min_o_orderdate}) PROJECT(columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'min_bal': RELMIN(args=[c_acctbal], partition=[], order=[]), 'min_o_orderdate': min_o_orderdate, 'n_rows': n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_mktsegment': t0.c_mktsegment, 'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'min_o_orderdate': MIN(o_orderdate), 'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt index b2444dc7c..117c0bfb9 100644 --- a/tests/test_plan_refsols/sqlite_udf_percent_positive.txt +++ b/tests/test_plan_refsols/sqlite_udf_percent_positive.txt @@ -1,6 +1,6 @@ ROOT(columns=[('name', r_name), ('pct_cust_positive', ROUND(percentage_expr_2, 2:numeric)), ('pct_supp_positive', ROUND(percentage_expr_3, 2:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t0.percentage_expr_2, 'percentage_expr_3': t1.percentage_expr_3, 'r_name': t0.r_name}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'percentage_expr_2': t1.percentage_expr_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'percentage_expr_2': PERCENTAGE(POSITIVE(c_acctbal))}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index b9e46e082..91077088f 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -1,12 +1,12 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('n_shipments', n_rows)], orderings=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_last, (s_name):asc_first], limit=3:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'p_name': t1.p_name, 's_name': t0.s_name, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'p_name': p_name, 'ps_suppkey': ps_suppkey, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t0.n_rows, 'p_name': t1.p_name, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t0.sum_l_quantity}) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows_1, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'sum_l_quantity': t1.sum_l_quantity}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows_1': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index f2a5c7f3c..cf94b18f6 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -1,5 +1,5 @@ ROOT(columns=[('supplier_name', s_name), ('nation_name', n_name), ('supplier_quantity', DEFAULT_TO(sum_l_quantity, 0:numeric)), ('national_qty_pct', 100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[]))], orderings=[(100.0:numeric * DEFAULT_TO(sum_l_quantity, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_l_quantity, 0:numeric)], partition=[s_nationkey], order=[])):desc_last], limit=5:numeric) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 'sum_l_quantity': t1.sum_l_quantity}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt index 00dcdb1fe..f879befc3 100644 --- a/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt +++ b/tests/test_plan_refsols/technograph_battery_failure_rates_anomalies.txt @@ -1,6 +1,6 @@ ROOT(columns=[('country_name', co_name), ('product_name', pr_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last, (pr_name):asc_first, (co_name):asc_first], limit=5:numeric) AGGREGATE(keys={'co_name': co_name, 'pr_name': pr_name}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'pr_name': t0.pr_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'de_id': t0.de_id, 'pr_name': t1.pr_name}) JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'de_id': t1.de_id, 'de_product_id': t1.de_product_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) diff --git a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt index 0a986eb55..526c2a80a 100644 --- a/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt +++ b/tests/test_plan_refsols/technograph_country_cartesian_oddball.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', co_name), ('n_other_countries', n_other_countries)], orderings=[(co_name):asc_first]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'co_name': t0.co_name, 'n_other_countries': t1.n_other_countries}) SCAN(table=main.COUNTRIES, columns={'co_name': co_name}) AGGREGATE(keys={}, aggregations={'n_other_countries': COUNT()}) SCAN(table=main.COUNTRIES, columns={}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index b9a9772c7..79a796b98 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,12 +1,12 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(1.0:numeric * DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) JOIN(condition=t0.co_id == t1.co_id & t0._id_1 == t1._id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'_id_3': _id_3, 'co_id': co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'n_rows': t1.n_rows}) JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) - JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) + JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id, 'de_purchase_country_id': de_purchase_country_id}) diff --git a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt index 1cf03558d..1bd54185e 100644 --- a/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_incident_rate_analysis.txt @@ -1,20 +1,20 @@ ROOT(columns=[('country_name', co_name), ('made_ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / n_rows, 2:numeric)), ('sold_ir', ROUND(DEFAULT_TO(agg_14, 0:numeric) / agg_3, 2:numeric)), ('user_ir', ROUND(DEFAULT_TO(agg_8, 0:numeric) / DEFAULT_TO(agg_5, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0.co_id == t1.us_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t0.agg_14, 'agg_3': t0.agg_3, 'agg_5': t1.n_rows, 'agg_8': t1.sum_n_rows, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_purchase_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_14': t1.sum_n_rows, 'agg_3': t1.n_rows, 'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t0.n_rows, 'sum_n_rows': t0.sum_n_rows}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_production_country_id': de_production_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'de_purchase_country_id': de_purchase_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_purchase_country_id': t0.de_purchase_country_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_purchase_country_id': de_purchase_country_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) AGGREGATE(keys={'us_country_id': us_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'us_country_id': t0.us_country_id}) JOIN(condition=t0.us_id == t1.de_owner_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_id': t1.de_id, 'us_country_id': t0.us_country_id}) SCAN(table=main.USERS, columns={'us_country_id': us_country_id, 'us_id': us_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_owner_id': de_owner_id}) diff --git a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt index b66d05d00..a402bbd57 100644 --- a/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt +++ b/tests/test_plan_refsols/technograph_error_percentages_sun_set_by_error.txt @@ -1,8 +1,8 @@ ROOT(columns=[('error', er_name), ('pct', ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[(ROUND(100.0:numeric * DEFAULT_TO(n_rows, 0:numeric) / RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]), 2:numeric)):desc_last]) - JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.er_id == t1.in_error_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'n_rows': t1.n_rows}) SCAN(table=main.ERRORS, columns={'er_id': er_id, 'er_name': er_name}) AGGREGATE(keys={'in_error_id': in_error_id}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'in_error_id': t0.in_error_id}) + JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'in_error_id': t0.in_error_id}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_id': in_error_id}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) diff --git a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt index b2c4d2f9a..24ffa13a9 100644 --- a/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt +++ b/tests/test_plan_refsols/technograph_error_rate_sun_set_by_factory_country.txt @@ -1,9 +1,9 @@ ROOT(columns=[('country', co_name), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(co_name):asc_first]) - JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.co_id == t1.de_production_country_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) PROJECT(columns={'de_production_country_id': de_production_country_id, 'n_rows': n_rows, 'sum_n_incidents': DEFAULT_TO(sum_n_incidents, 0:numeric)}) AGGREGATE(keys={'de_production_country_id': de_production_country_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'de_production_country_id': t0.de_production_country_id, 'n_rows': t1.n_rows}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'de_production_country_id': t0.de_production_country_id}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id, 'de_production_country_id': de_production_country_id}) FILTER(condition=pr_name == 'Sun-Set':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt index 7af10fc70..e3bd2bb8e 100644 --- a/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt +++ b/tests/test_plan_refsols/technograph_incident_rate_per_brand.txt @@ -1,6 +1,6 @@ ROOT(columns=[('brand', pr_brand), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(pr_brand):asc_first]) AGGREGATE(keys={'pr_brand': pr_brand}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'de_id': t0.de_id, 'pr_brand': t1.pr_brand}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_most_unreliable_products.txt b/tests/test_plan_refsols/technograph_most_unreliable_products.txt index 7abb7f5b8..763603e0b 100644 --- a/tests/test_plan_refsols/technograph_most_unreliable_products.txt +++ b/tests/test_plan_refsols/technograph_most_unreliable_products.txt @@ -1,8 +1,8 @@ ROOT(columns=[('product', pr_name), ('product_brand', pr_brand), ('product_type', pr_type), ('ir', ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_incidents, 0:numeric) / n_rows, 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) + JOIN(condition=t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'pr_brand': t0.pr_brand, 'pr_name': t0.pr_name, 'pr_type': t0.pr_type, 'sum_n_incidents': t1.sum_n_incidents}) SCAN(table=main.PRODUCTS, columns={'pr_brand': pr_brand, 'pr_id': pr_id, 'pr_name': pr_name, 'pr_type': pr_type}) AGGREGATE(keys={'de_product_id': de_product_id}, aggregations={'n_rows': COUNT(), 'sum_n_incidents': SUM(n_rows)}) - JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) + JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'de_product_id': t0.de_product_id, 'n_rows': t1.n_rows}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) AGGREGATE(keys={'in_device_id': in_device_id}, aggregations={'n_rows': COUNT()}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id}) diff --git a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt index 716f03473..e103270cd 100644 --- a/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt +++ b/tests/test_plan_refsols/top_5_nations_balance_by_num_suppliers.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('total_bal', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'ordering_0': t1.ordering_0, 'sum_s_acctbal': t1.sum_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt index 5983915cd..c74bef837 100644 --- a/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt +++ b/tests/test_plan_refsols/top_5_nations_by_num_supplierss.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', n_nationkey), ('region_key', n_regionkey), ('name', n_name), ('comment', n_comment)], orderings=[(ordering_0):asc_last], limit=5:numeric) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_comment': t0.n_comment, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ordering_0': t1.ordering_0}) SCAN(table=tpch.NATION, columns={'n_comment': n_comment, 'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'ordering_0': COUNT(s_suppkey)}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/top_customers_by_orders.txt b/tests/test_plan_refsols/top_customers_by_orders.txt index f9ae80b9e..399f36eb7 100644 --- a/tests/test_plan_refsols/top_customers_by_orders.txt +++ b/tests/test_plan_refsols/top_customers_by_orders.txt @@ -1,5 +1,5 @@ ROOT(columns=[('customer_key', c_custkey), ('n_orders', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (c_custkey):asc_first], limit=5:numeric) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q10.txt b/tests/test_plan_refsols/tpch_q10.txt index 5bd99fb20..6614256b2 100644 --- a/tests/test_plan_refsols/tpch_q10.txt +++ b/tests/test_plan_refsols/tpch_q10.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_CUSTKEY', c_custkey), ('C_NAME', c_name), ('REVENUE', DEFAULT_TO(sum_expr_1, 0:numeric)), ('C_ACCTBAL', c_acctbal), ('N_NAME', n_name), ('C_ADDRESS', c_address), ('C_PHONE', c_phone), ('C_COMMENT', c_comment)], orderings=[(DEFAULT_TO(sum_expr_1, 0:numeric)):desc_last, (c_custkey):asc_first], limit=20:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_phone': t0.c_phone, 'n_name': t1.n_name, 'sum_expr_1': t0.sum_expr_1}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_address': t0.c_address, 'c_comment': t0.c_comment, 'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'c_phone': t0.c_phone, 'sum_expr_1': t1.sum_expr_1}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_address': c_address, 'c_comment': c_comment, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr_1': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'o_custkey': t0.o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q13.txt b/tests/test_plan_refsols/tpch_q13.txt index 2646b1149..430e21253 100644 --- a/tests/test_plan_refsols/tpch_q13.txt +++ b/tests/test_plan_refsols/tpch_q13.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_COUNT', num_non_special_orders), ('CUSTDIST', CUSTDIST)], orderings=[(CUSTDIST):desc_last, (num_non_special_orders):desc_last], limit=10:numeric) AGGREGATE(keys={'num_non_special_orders': DEFAULT_TO(n_rows, 0:numeric)}, aggregations={'CUSTDIST': COUNT()}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=NOT(LIKE(o_comment, '%special%requests%':string)), columns={'o_custkey': o_custkey}) diff --git a/tests/test_plan_refsols/tpch_q15.txt b/tests/test_plan_refsols/tpch_q15.txt index b7aa4880a..5c423740e 100644 --- a/tests/test_plan_refsols/tpch_q15.txt +++ b/tests/test_plan_refsols/tpch_q15.txt @@ -1,8 +1,8 @@ ROOT(columns=[('S_SUPPKEY', s_suppkey), ('S_NAME', s_name), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('TOTAL_REVENUE', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[(s_suppkey):asc_first]) - JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) + JOIN(condition=DEFAULT_TO(t1.sum_expr_3, 0:numeric) == t0.max_revenue & t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_phone': t0.s_phone, 's_suppkey': t0.s_suppkey, 'sum_expr_3': t1.sum_expr_3}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'max_revenue': t0.max_revenue, 's_address': t1.s_address, 's_name': t1.s_name, 's_phone': t1.s_phone, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'max_revenue': MAX(DEFAULT_TO(sum_expr_2, 0:numeric))}) - JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) + JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sum_expr_2': t1.sum_expr_2}) SCAN(table=tpch.SUPPLIER, columns={'s_suppkey': s_suppkey}) AGGREGATE(keys={'l_suppkey': l_suppkey}, aggregations={'sum_expr_2': SUM(l_extendedprice * 1:numeric - l_discount)}) FILTER(condition=l_shipdate < datetime.date(1996, 4, 1):datetime & l_shipdate >= datetime.date(1996, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q18.txt b/tests/test_plan_refsols/tpch_q18.txt index 9a3b55e20..6f14bbecb 100644 --- a/tests/test_plan_refsols/tpch_q18.txt +++ b/tests/test_plan_refsols/tpch_q18.txt @@ -1,6 +1,6 @@ ROOT(columns=[('C_NAME', c_name), ('C_CUSTKEY', c_custkey), ('O_ORDERKEY', o_orderkey), ('O_ORDERDATE', o_orderdate), ('O_TOTALPRICE', o_totalprice), ('TOTAL_QUANTITY', DEFAULT_TO(sum_l_quantity, 0:numeric))], orderings=[(o_totalprice):desc_last, (o_orderdate):asc_first], limit=10:numeric) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) FILTER(condition=DEFAULT_TO(sum_l_quantity, 0:numeric) > 300:numeric, columns={'l_orderkey': l_orderkey, 'sum_l_quantity': sum_l_quantity}) diff --git a/tests/test_plan_refsols/tpch_q20.txt b/tests/test_plan_refsols/tpch_q20.txt index a09dd5caf..012ae3c85 100644 --- a/tests/test_plan_refsols/tpch_q20.txt +++ b/tests/test_plan_refsols/tpch_q20.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_address': t0.s_address, 's_name': t0.s_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_address': t0.s_address, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_address': s_address, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'CANADA':string, columns={'n_nationkey': n_nationkey}) @@ -7,7 +7,7 @@ ROOT(columns=[('S_NAME', s_name), ('S_ADDRESS', s_address)], orderings=[(s_name) AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={}) JOIN(condition=t0.ps_partkey == t1.p_partkey & t0.ps_availqty > 0.5:numeric * DEFAULT_TO(t1.sum_l_quantity, 0:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) + JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_partkey': t0.p_partkey, 'sum_l_quantity': t1.sum_l_quantity}) FILTER(condition=STARTSWITH(p_name, 'forest':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) AGGREGATE(keys={'l_partkey': l_partkey}, aggregations={'sum_l_quantity': SUM(l_quantity)}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index e15d1773c..52d68c1b0 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_NAME', s_name), ('NUMWAIT', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(DEFAULT_TO(n_rows, 0:numeric)):desc_last, (s_name):asc_first], limit=10:numeric) - JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) + JOIN(condition=t0.s_suppkey == t1.anything_l_suppkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'SAUDI ARABIA':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/tpch_q22.txt b/tests/test_plan_refsols/tpch_q22.txt index 5c7637dd0..fad4b1cdf 100644 --- a/tests/test_plan_refsols/tpch_q22.txt +++ b/tests/test_plan_refsols/tpch_q22.txt @@ -1,7 +1,7 @@ ROOT(columns=[('CNTRY_CODE', cntry_code), ('NUM_CUSTS', n_rows), ('TOTACCTBAL', DEFAULT_TO(sum_c_acctbal, 0:numeric))], orderings=[(cntry_code):asc_first]) AGGREGATE(keys={'cntry_code': SLICE(c_phone, None:unknown, 2:numeric, None:unknown)}, aggregations={'n_rows': COUNT(), 'sum_c_acctbal': SUM(c_acctbal)}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) == 0:numeric, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'c_phone': t0.c_phone, 'n_rows': t1.n_rows}) JOIN(condition=t1.c_acctbal > t0.global_avg_balance, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_phone': t1.c_phone}) AGGREGATE(keys={}, aggregations={'global_avg_balance': AVG(c_acctbal)}) FILTER(condition=c_acctbal > 0.0:numeric & ISIN(SLICE(c_phone, None:unknown, 2:numeric, None:unknown), ['13', '31', '23', '29', '30', '18', '17']:array[unknown]), columns={'c_acctbal': c_acctbal}) diff --git a/tests/test_plan_refsols/tpch_q8.txt b/tests/test_plan_refsols/tpch_q8.txt index f54d41956..c32a5dca3 100644 --- a/tests/test_plan_refsols/tpch_q8.txt +++ b/tests/test_plan_refsols/tpch_q8.txt @@ -2,7 +2,7 @@ ROOT(columns=[('O_YEAR', O_YEAR), ('MKT_SHARE', DEFAULT_TO(sum_brazil_volume, 0: AGGREGATE(keys={'O_YEAR': YEAR(o_orderdate)}, aggregations={'sum_brazil_volume': SUM(IFF(n_name == 'BRAZIL':string, l_extendedprice * 1:numeric - l_discount, 0:numeric)), 'sum_volume': SUM(l_extendedprice * 1:numeric - l_discount)}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t1.n_name, 'o_orderdate': t0.o_orderdate}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_suppkey': t0.l_suppkey, 'o_orderdate': t1.o_orderdate}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) FILTER(condition=p_type == 'ECONOMY ANODIZED STEEL':string, columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/tpch_q9.txt b/tests/test_plan_refsols/tpch_q9.txt index d032c5bc1..1f5e1a933 100644 --- a/tests/test_plan_refsols/tpch_q9.txt +++ b/tests/test_plan_refsols/tpch_q9.txt @@ -3,7 +3,7 @@ ROOT(columns=[('NATION', n_name), ('O_YEAR', o_year), ('AMOUNT', DEFAULT_TO(sum_ JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_name': t0.n_name, 'o_orderdate': t0.o_orderdate, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_orderdate': t1.o_orderdate}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_name': t1.n_name}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) FILTER(condition=CONTAINS(p_name, 'green':string), columns={'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/various_aggfuncs_simple.txt b/tests/test_plan_refsols/various_aggfuncs_simple.txt index c98d2d967..c5ca7c2ea 100644 --- a/tests/test_plan_refsols/various_aggfuncs_simple.txt +++ b/tests/test_plan_refsols/various_aggfuncs_simple.txt @@ -1,5 +1,5 @@ ROOT(columns=[('nation_name', n_name), ('total_bal', sum_c_acctbal), ('num_bal', count_c_acctbal), ('avg_bal', DEFAULT_TO(avg_c_acctbal, 0:numeric)), ('min_bal', min_c_acctbal), ('max_bal', max_c_acctbal), ('num_cust', n_rows)], orderings=[]) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'count_c_acctbal': t1.count_c_acctbal_1, 'max_c_acctbal': t1.max_c_acctbal_1, 'min_c_acctbal': t1.min_c_acctbal_1, 'n_name': t0.n_name, 'n_rows': t1.n_rows_1, 'sum_c_acctbal': t1.sum_c_acctbal_1}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'avg_c_acctbal': AVG(c_acctbal), 'count_c_acctbal_1': COUNT(c_acctbal), 'max_c_acctbal_1': MAX(c_acctbal), 'min_c_acctbal_1': MIN(c_acctbal), 'n_rows_1': COUNT(), 'sum_c_acctbal_1': SUM(c_acctbal)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_1.txt b/tests/test_plan_refsols/window_filter_order_1.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_1.txt +++ b/tests/test_plan_refsols/window_filter_order_1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_2.txt b/tests/test_plan_refsols/window_filter_order_2.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_2.txt +++ b/tests/test_plan_refsols/window_filter_order_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_3.txt b/tests/test_plan_refsols/window_filter_order_3.txt index 3e326b484..8ea95d8cf 100644 --- a/tests/test_plan_refsols/window_filter_order_3.txt +++ b/tests/test_plan_refsols/window_filter_order_3.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) < RELAVG(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & n_rows > 0:numeric, columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_4.txt b/tests/test_plan_refsols/window_filter_order_4.txt index f4240ff57..788d47b44 100644 --- a/tests/test_plan_refsols/window_filter_order_4.txt +++ b/tests/test_plan_refsols/window_filter_order_4.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=n_rows < RELAVG(args=[n_rows], partition=[], order=[]), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_8.txt b/tests/test_plan_refsols/window_filter_order_8.txt index 30462c7cd..ee43f0aed 100644 --- a/tests/test_plan_refsols/window_filter_order_8.txt +++ b/tests/test_plan_refsols/window_filter_order_8.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n)], orderings=[]) AGGREGATE(keys={}, aggregations={'n': COUNT()}) FILTER(condition=c_acctbal < RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[], order=[]) & ABSENT(n_rows), columns={}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t0.c_acctbal, 'n_rows': t1.n_rows}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/window_filter_order_9.txt b/tests/test_plan_refsols/window_filter_order_9.txt index 642f47f0a..189b9c99b 100644 --- a/tests/test_plan_refsols/window_filter_order_9.txt +++ b/tests/test_plan_refsols/window_filter_order_9.txt @@ -5,7 +5,7 @@ ROOT(columns=[('n', n)], orderings=[]) FILTER(condition=o_clerk == 'Clerk#000000001':string, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) PROJECT(columns={'c_custkey': c_custkey, 'expr_0': 1:numeric, 'total_spent': DEFAULT_TO(sum_o_totalprice, 0:numeric)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_o_totalprice': t1.sum_o_totalprice}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment}) AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_sql_refsols/aggregation_functions_ansi.sql b/tests/test_sql_refsols/aggregation_functions_ansi.sql index 55964db76..51d482591 100644 --- a/tests/test_sql_refsols/aggregation_functions_ansi.sql +++ b/tests/test_sql_refsols/aggregation_functions_ansi.sql @@ -19,29 +19,25 @@ WITH _s1 AS ( STDDEV(customer.c_acctbal) AS sample_std_c_acctbal, VARIANCE(customer.c_acctbal) AS sample_variance_c_acctbal, SUM(customer.c_acctbal) AS sum_c_acctbal, - SUM(_s1.n_rows) AS sum_n_rows, - customer.c_nationkey + SUM(_s1.n_rows) AS sum_n_rows FROM tpch.customer AS customer LEFT JOIN _s1 AS _s1 ON _s1.o_custkey = customer.c_custkey GROUP BY - 13 + customer.c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.median_c_acctbal AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.agg_7 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.nation AS nation -JOIN _t1 AS _t1 - ON _t1.c_nationkey = nation.n_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + median_c_acctbal AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + agg_7 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/aggregation_functions_mysql.sql b/tests/test_sql_refsols/aggregation_functions_mysql.sql index 9e2e68cef..85f2429d5 100644 --- a/tests/test_sql_refsols/aggregation_functions_mysql.sql +++ b/tests/test_sql_refsols/aggregation_functions_mysql.sql @@ -72,27 +72,23 @@ WITH _s1 AS ( COUNT(c_acctbal) - 1 ) AS sample_variance_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, - SUM(n_rows) AS sum_n_rows, - c_nationkey + SUM(n_rows) AS sum_n_rows FROM _t2 GROUP BY - 13 + c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.NATION AS NATION -JOIN _t1 AS _t1 - ON NATION.n_nationkey = _t1.c_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + avg_expr_15 AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + max_expr_16 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/aggregation_functions_sqlite.sql b/tests/test_sql_refsols/aggregation_functions_sqlite.sql index b2f1e4afc..01f40f98a 100644 --- a/tests/test_sql_refsols/aggregation_functions_sqlite.sql +++ b/tests/test_sql_refsols/aggregation_functions_sqlite.sql @@ -69,27 +69,23 @@ WITH _s1 AS ( COUNT(c_acctbal) - 1 ) AS sample_variance_c_acctbal, SUM(c_acctbal) AS sum_c_acctbal, - SUM(n_rows) AS sum_n_rows, - c_nationkey + SUM(n_rows) AS sum_n_rows FROM _t2 GROUP BY - 13 + c_nationkey ) SELECT - COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, - _t1.avg_c_acctbal AS avg_value, - _t1.avg_expr_15 AS median_value, - _t1.min_c_acctbal AS min_value, - _t1.max_c_acctbal AS max_value, - _t1.max_expr_16 AS quantile_value, - _t1.anything_c_acctbal AS anything_value, - _t1.count_c_acctbal AS count_value, - _t1.ndistinct_c_acctbal AS count_distinct_value, - _t1.sample_variance_c_acctbal AS variance_value, - _t1.sample_std_c_acctbal AS stddev_value -FROM tpch.nation AS nation -JOIN _t1 AS _t1 - ON _t1.c_nationkey = nation.n_nationkey - AND ( - _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL - ) + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + avg_expr_15 AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + max_expr_16 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_variance_c_acctbal AS variance_value, + sample_std_c_acctbal AS stddev_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql index d078e262b..5f26d0a69 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_ansi.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbticker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_basic4_mysql.sql b/tests/test_sql_refsols/defog_broker_basic4_mysql.sql index 1fcd150c0..d6e7fda23 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_mysql.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid AS sbTxCustId, - sbtxtickerid AS sbTxTickerId - FROM main.sbTransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbTicker.sbtickertype AS sbTickerType, - _s0.sbTxCustId - FROM _s0 AS _s0 - JOIN main.sbTicker AS sbTicker - ON _s0.sbTxTickerId = sbTicker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbCustomer.sbcuststate AS state, - _s2.sbTickerType AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbTicker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbTransaction AS sbTransaction +JOIN main.sbTicker AS sbTicker + ON sbTicker.sbtickerid = sbTransaction.sbtxtickerid JOIN main.sbCustomer AS sbCustomer - ON _s2.sbTxCustId = sbCustomer.sbcustid + ON sbCustomer.sbcustid = sbTransaction.sbtxcustid GROUP BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql index d078e262b..5f26d0a69 100644 --- a/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_basic4_sqlite.sql @@ -1,31 +1,12 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS num_transactions, - sbtxcustid, - sbtxtickerid - FROM main.sbtransaction - GROUP BY - 2, - 3 -), _s2 AS ( - SELECT - SUM(_s0.num_transactions) AS num_transactions, - sbticker.sbtickertype, - _s0.sbtxcustid - FROM _s0 AS _s0 - JOIN main.sbticker AS sbticker - ON _s0.sbtxtickerid = sbticker.sbtickerid - GROUP BY - 2, - 3 -) SELECT sbcustomer.sbcuststate AS state, - _s2.sbtickertype AS ticker_type, - SUM(_s2.num_transactions) AS num_transactions -FROM _s2 AS _s2 + sbticker.sbtickertype AS ticker_type, + COUNT(*) AS num_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid JOIN main.sbcustomer AS sbcustomer - ON _s2.sbtxcustid = sbcustomer.sbcustid + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid GROUP BY 1, 2 From b4e3318c5f2183628c1be463e752bcc97a4ba9dc Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 8 Sep 2025 15:54:06 -0400 Subject: [PATCH 93/97] Minor revisions --- pydough/conversion/hybrid_tree.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index fa01d2b0c..db0d3cbf8 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -619,7 +619,7 @@ def infer_metadata_reverse_cardinality( ) -> JoinCardinality: """ Infers the cardinality of the reverse of a join from parent to child - based on the metadata from the parent->child relationship. + based on the metadata of the reverse-relationship, if one exists. Args: `metadata`: the metadata for the sub-collection property mapping @@ -629,7 +629,8 @@ def infer_metadata_reverse_cardinality( The join cardinality for the connection from the child back to the parent, if it can be inferred. Uses `PLURAL_FILTER` as a fallback. """ - # If there is no reverse, fall back to plural filter. + # If there is no reverse, fall back to plural filter (which is the + # safest default assumption). if ( not isinstance(metadata, ReversiblePropertyMetadata) or metadata.reverse is None @@ -639,13 +640,16 @@ def infer_metadata_reverse_cardinality( # If the reverse property exists, use its properties to # infer if the reverse cardinality is singular or plural # and whether a match always exists or not. - cardinality: JoinCardinality = ( - JoinCardinality.PLURAL_ACCESS - if metadata.reverse.is_plural - else JoinCardinality.SINGULAR_ACCESS - ) - if not metadata.reverse.always_matches: - cardinality = cardinality.add_filter() + cardinality: JoinCardinality + match (metadata.reverse.is_plural, metadata.reverse.always_matches): + case (False, True): + cardinality = JoinCardinality.SINGULAR_ACCESS + case (False, False): + cardinality = JoinCardinality.SINGULAR_FILTER + case (True, True): + cardinality = JoinCardinality.PLURAL_ACCESS + case (True, False): + cardinality = JoinCardinality.PLURAL_FILTER return cardinality def infer_root_reverse_cardinality(self) -> JoinCardinality: From 091a3538585a76dd5df7c3a61db46272d108bd13 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 10 Sep 2025 14:17:39 -0400 Subject: [PATCH 94/97] Adjusting edge case for correlation extraction affecting cardinality --- pydough/conversion/hybrid_correlation_extraction.py | 6 ++++++ tests/test_plan_refsols/correl_1.txt | 2 +- tests/test_plan_refsols/correl_33.txt | 2 +- tests/test_plan_refsols/correl_8.txt | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pydough/conversion/hybrid_correlation_extraction.py b/pydough/conversion/hybrid_correlation_extraction.py index 5a1ff44e3..d27a0e918 100644 --- a/pydough/conversion/hybrid_correlation_extraction.py +++ b/pydough/conversion/hybrid_correlation_extraction.py @@ -238,6 +238,9 @@ def attempt_correlation_extraction( for _, rhs_key in new_equi_filters: bottom_subtree.agg_keys.append(rhs_key) connection.always_exists = False + connection.reverse_cardinality = ( + connection.reverse_cardinality.add_filter() + ) if len(new_general_filters) > 0: if bottom_subtree.general_join_condition is not None: @@ -262,6 +265,9 @@ def attempt_correlation_extraction( pydop.BAN, new_general_filters, BooleanType() ) connection.always_exists = False + connection.reverse_cardinality = ( + connection.reverse_cardinality.add_filter() + ) # Update the filter condition with the new conjunction of terms if new_conjunction != conjunction: diff --git a/tests/test_plan_refsols/correl_1.txt b/tests/test_plan_refsols/correl_1.txt index 0c3125585..352d9d69f 100644 --- a/tests/test_plan_refsols/correl_1.txt +++ b/tests/test_plan_refsols/correl_1.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('n_prefix_nations', DEFAULT_TO(n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) + JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == t1.expr_1 & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'expr_1': SLICE(n_name, None:unknown, 1:numeric, None:unknown), 'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_33.txt b/tests/test_plan_refsols/correl_33.txt index 57219f30d..614d9cd79 100644 --- a/tests/test_plan_refsols/correl_33.txt +++ b/tests/test_plan_refsols/correl_33.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) - JOIN(condition=MONTH(t0.min_o_orderdate) == t1.month_o_orderdate & YEAR(t0.min_o_orderdate) == t1.year_o_orderdate, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_rows': t1.n_rows}) + JOIN(condition=MONTH(t0.min_o_orderdate) == t1.month_o_orderdate & YEAR(t0.min_o_orderdate) == t1.year_o_orderdate, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_rows': t1.n_rows}) AGGREGATE(keys={}, aggregations={'min_o_orderdate': MIN(o_orderdate)}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) diff --git a/tests/test_plan_refsols/correl_8.txt b/tests/test_plan_refsols/correl_8.txt index 3f1326205..ed1c5cfe8 100644 --- a/tests/test_plan_refsols/correl_8.txt +++ b/tests/test_plan_refsols/correl_8.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', n_name), ('rname', r_name)], orderings=[(n_name):asc_first]) - JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) + JOIN(condition=SLICE(t0.n_name, None:unknown, 1:numeric, None:unknown) == t1.expr_0 & t0.n_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'r_name': t1.r_name}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) PROJECT(columns={'expr_0': SLICE(r_name, None:unknown, 1:numeric, None:unknown), 'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) From d903c9d9043bd123f71244dbe6a41ae7de514817 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 17 Sep 2025 15:14:58 -0400 Subject: [PATCH 95/97] temporary reversion as setup is being adjusted --- pydough/conversion/join_agg_transpose.py | 182 ++++++++---------- .../aggregation_analytics_2.txt | 8 +- .../aggregation_analytics_3.txt | 8 +- tests/test_plan_refsols/correl_30.txt | 26 +-- .../multi_partition_access_3.txt | 14 +- .../multi_partition_access_4.txt | 12 +- tests/test_sql_refsols/correl_30_sqlite.sql | 5 +- .../defog_broker_adv5_ansi.sql | 57 +++--- .../defog_broker_adv5_mysql.sql | 46 ++--- .../defog_broker_adv5_postgres.sql | 46 ++--- .../defog_broker_adv5_snowflake.sql | 46 ++--- .../defog_broker_adv5_sqlite.sql | 50 ++--- .../defog_dealership_adv11_ansi.sql | 22 +-- .../defog_dealership_adv11_mysql.sql | 22 +-- .../defog_dealership_adv11_postgres.sql | 22 +-- .../defog_dealership_adv11_snowflake.sql | 22 +-- .../defog_dealership_adv11_sqlite.sql | 22 +-- .../defog_dealership_gen4_ansi.sql | 30 +-- .../defog_dealership_gen4_mysql.sql | 32 +-- .../defog_dealership_gen4_postgres.sql | 30 +-- .../defog_dealership_gen4_snowflake.sql | 30 +-- .../defog_dealership_gen4_sqlite.sql | 32 +-- ...aph_incident_rate_by_release_year_ansi.sql | 9 - ...h_incident_rate_by_release_year_sqlite.sql | 9 - 24 files changed, 369 insertions(+), 413 deletions(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index d16fec832..6f32751b1 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -12,15 +12,13 @@ ColumnReference, ColumnReferenceFinder, Join, + JoinCardinality, JoinType, RelationalExpression, RelationalNode, RelationalRoot, RelationalShuttle, ) -from pydough.relational.rel_util import ( - add_input_name, -) class JoinAggregateTransposeShuttle(RelationalShuttle): @@ -35,10 +33,25 @@ def reset(self): self.finder.reset() def visit_join(self, node: Join) -> RelationalNode: + result: RelationalNode | None = None + + # Attempt the transpose where the left input is an Aggregate. If it + # succeeded, use that as the result and recursively transform its + # inputs. if isinstance(node.inputs[0], Aggregate): - return self.generic_visit_inputs( - self.join_aggregate_transpose(node, node.inputs[0]) - ) + result = self.join_aggregate_transpose(node, node.inputs[0], True) + if result is not None: + return self.generic_visit_inputs(result) + + # If the attempt failed, then attempt the transpose where the right + # input is an Aggregate. If this attempt succeeded, use that as the + # result and recursively transform its inputs. + if isinstance(node.inputs[1], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[1], False) + if result is not None: + return self.generic_visit_inputs(result) + + # If this attempt failed, fall back to the regular implementation. return super().visit_join(node) def generate_name(self, base: str, used_names: Iterable[str]) -> str: @@ -57,19 +70,22 @@ def generate_name(self, base: str, used_names: Iterable[str]) -> str: i += 1 def join_aggregate_transpose( - self, join: Join, aggregate: Aggregate - ) -> RelationalNode: + self, join: Join, aggregate: Aggregate, is_left: bool + ) -> RelationalNode | None: """ Transposes a Join above an Aggregate into an Aggregate above a Join, - when possible. + when possible and it would be better for performance to use the join + first to filter some of the rows before aggregating. Args: `join`: the Join node above the Aggregate. `aggregate`: the Aggregate node that is the left input to the Join. + `is_left`: whether the Aggregate is the left input to the Join + (True) or the right input (False). Returns: - The new RelationalNode tree with the Join and Aggregate transposed, or - the original Join if the transpose is not possible. + The new RelationalNode tree with the Join and Aggregate transposed, + or None if the transpose is not possible. """ # Verify that the join is an inner, left, or semi-join, and that the # join cardinality is singular (unless the aggregations are not affected @@ -78,115 +94,79 @@ def join_aggregate_transpose( call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) for call in aggregate.aggregations.values() ) + + # The cardinality with regards to the input being considered must be + # singular (unless the aggregations allow plural), and must be + # filtering (since the point of joining before aggregation is to reduce + # the number of rows to aggregate). + cardinality: JoinCardinality = ( + join.cardinality if is_left else join.reverse_cardinality + ) + + # Verify the cardinality meets the specified criteria, and that the join + # type is INNER/SEMI (since LEFT would not be filtering), where SEMI is + # only allowed if the aggregation is on the left. if not ( - join.join_type in (JoinType.INNER, JoinType.SEMI) - and (join.cardinality.singular or aggs_allow_plural) + ( + (join.join_type == JoinType.INNER) + or (join.join_type == JoinType.SEMI and is_left) + ) + and cardinality.filters + and (cardinality.singular or aggs_allow_plural) ): - return join + return None + + # The alias of the input to the join that corresponds to the + # aggregate. + desired_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) # Find all of the columns used in the join condition that come from the - # left-hand side of the join. + # aggregate side of the join self.finder.reset() join.condition.accept(self.finder) - lhs_condition_columns: set[ColumnReference] = { + agg_condition_columns: set[ColumnReference] = { col for col in self.finder.get_column_references() - if col.input_name == join.default_input_aliases[0] + if col.input_name == desired_alias } - # Verify that there is at least one left hand side condition column, - # and all of them are grouping keys in the aggregate. - if len(lhs_condition_columns) == 0 or any( - col.name not in aggregate.keys for col in lhs_condition_columns + # Verify ALL of the condition columns from that side of the join are + # in the aggregate keys. + if len(agg_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in agg_condition_columns ): - return join - - reverse_join_columns: dict[str, RelationalExpression] = {} - for join_col_name, join_col_expr in join.columns.items(): - assert isinstance(join_col_expr, ColumnReference) - reverse_join_columns[join_col_expr.name] = ColumnReference( - join_col_name, join_col_expr.data_type - ) + return None new_join_columns: dict[str, RelationalExpression] = {} - new_key_columns: dict[str, RelationalExpression] = {} - new_aggregate_columns: dict[str, CallExpression] = {} - used_column_names: set[str] = set() - - for col_name, col_expr in join.columns.items(): - self.finder.reset() - col_expr.accept(self.finder) - if all( - expr.input_name == join.default_input_aliases[1] - for expr in self.finder.get_column_references() - ): - new_join_columns[col_name] = col_expr - new_aggregate_columns[col_name] = CallExpression( - pydop.ANYTHING, - col_expr.data_type, - [ColumnReference(col_name, col_expr.data_type)], - ) - used_column_names.add(col_name) - elif not ( - isinstance(col_expr, ColumnReference) - and col_expr.input_name == join.default_input_aliases[0] - ): - return join - - for key_name, key_expr in aggregate.keys.items(): - new_join_columns[key_name] = add_input_name( - key_expr, join.default_input_aliases[0] - ) - agg_key_name: str = self.generate_name(key_name, used_column_names) - new_key_columns[agg_key_name] = ColumnReference( - key_name, col_expr.data_type - ) - used_column_names.add(agg_key_name) - - for agg_name, agg_expr in aggregate.aggregations.items(): - new_inputs: list[RelationalExpression] = [] - for input_expr in agg_expr.inputs: - join_name: str - if isinstance(input_expr, ColumnReference): - join_name = self.generate_name(input_expr.name, new_join_columns) - else: - join_name = self.generate_name("expr", new_join_columns) - new_join_columns[join_name] = add_input_name( - input_expr, join.default_input_aliases[0] - ) - new_inputs.append(ColumnReference(join_name, input_expr.data_type)) - agg_name = self.generate_name(agg_name, used_column_names) - if new_inputs != agg_expr.inputs: - agg_expr = CallExpression( - agg_expr.op, - agg_expr.data_type, - new_inputs, - ) - new_aggregate_columns[agg_name] = agg_expr - used_column_names.add(agg_name) + new_aggregate_aggs: dict[str, CallExpression] = {} + new_aggregate_keys: dict[str, RelationalExpression] = {} + + new_condition: RelationalExpression = join.condition + agg_input: RelationalNode = aggregate.inputs[0] + non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] + new_join_inputs: list[RelationalNode] = ( + [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] + ) + + # TODO: FINISH THIS + return None new_join: Join = Join( - inputs=[aggregate.inputs[0], join.inputs[1]], - condition=join.condition, - columns=new_join_columns, - join_type=join.join_type, - cardinality=join.cardinality, + new_join_inputs, + new_condition, + join.join_type, + new_join_columns, + join.cardinality, + join.reverse_cardinality, + join.correl_name, ) - new_aggregate = Aggregate( - input=new_join, keys=new_key_columns, aggregations=new_aggregate_columns + new_aggregate: Aggregate = Aggregate( + new_join, new_aggregate_keys, new_aggregate_aggs ) - # print() - # print(join.to_tree_string()) - # print(lhs_condition_columns) - # print(new_join_columns) - # print(new_key_columns) - # print(new_aggregate_columns) - # print(new_aggregate.to_tree_string()) - # breakpoint() - # return join - return new_aggregate diff --git a/tests/test_plan_refsols/aggregation_analytics_2.txt b/tests/test_plan_refsols/aggregation_analytics_2.txt index c605d16b7..b719a64e1 100644 --- a/tests/test_plan_refsols/aggregation_analytics_2.txt +++ b/tests/test_plan_refsols/aggregation_analytics_2.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('part_name', anything_p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (anything_p_name):asc_first], limit=4:numeric) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'l_tax': t0.l_tax, 'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) +ROOT(columns=[('part_name', p_name), ('revenue_generated', ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=4:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', anything_p_name), ('revenue_generated', ROUND(DEFAUL SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=ISIN(YEAR(l_shipdate), [1995, 1996]:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/aggregation_analytics_3.txt b/tests/test_plan_refsols/aggregation_analytics_3.txt index 3740c9c7d..f29c91858 100644 --- a/tests/test_plan_refsols/aggregation_analytics_3.txt +++ b/tests/test_plan_refsols/aggregation_analytics_3.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('part_name', anything_p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (anything_p_name):asc_first], limit=3:numeric) - AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) - JOIN(condition=t0.anything_ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'l_tax': t0.l_tax, 'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) +ROOT(columns=[('part_name', p_name), ('revenue_ratio', ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_revenue, 0:numeric) / DEFAULT_TO(sum_l_quantity, 0:numeric), 2:numeric)):asc_first, (p_name):asc_first], limit=3:numeric) + JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity, 'sum_revenue': t0.sum_revenue}) + AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'sum_l_quantity': SUM(l_quantity), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount * 1:numeric - l_tax - l_quantity * ps_supplycost)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'l_tax': t1.l_tax, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t0.ps_supplycost}) @@ -11,4 +11,4 @@ ROOT(columns=[('part_name', anything_p_name), ('revenue_ratio', ROUND(DEFAULT_TO SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey, 'l_tax': l_tax}) - SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) + SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/correl_30.txt b/tests/test_plan_refsols/correl_30.txt index f311b3bfb..0912d4959 100644 --- a/tests/test_plan_refsols/correl_30.txt +++ b/tests/test_plan_refsols/correl_30.txt @@ -1,6 +1,6 @@ -ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', anything_n_rows)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=t0.anything_n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'r_name': t0.r_name}) +ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n_name), ('n_above_avg_customers', n_rows), ('n_above_avg_suppliers', n_above_avg_suppliers)], orderings=[(anything_lower_r_name):asc_first, (anything_n_name):asc_first]) + JOIN(condition=t0.n_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_lower_r_name': t0.anything_lower_r_name, 'anything_n_name': t0.anything_n_name, 'n_above_avg_suppliers': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_lower_r_name': ANYTHING(LOWER(r_name)), 'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_acctbal > t0.avg_c_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_c_acctbal': t0.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t1.r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_c_acctbal': t1.avg_c_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) @@ -10,13 +10,13 @@ ROOT(columns=[('region_name', anything_lower_r_name), ('nation_name', anything_n FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) - FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey & t1.s_acctbal > t0.avg_s_acctbal, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'avg_s_acctbal': AVG(s_acctbal)}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) + FILTER(condition=NOT(ISIN(r_name, ['MIDDLE EAST', 'AFRICA', 'ASIA']:array[unknown])), columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index b6041588b..3203eb669 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -1,12 +1,12 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbTickerSymbol):asc_first]) - JOIN(condition=t1.sbDpClose < t0.max_anything_sbDpClose & t0.anything_sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) - AGGREGATE(keys={'anything_sbTickerType': anything_sbTickerType}, aggregations={'max_anything_sbDpClose': MAX(anything_sbDpClose)}) - AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'anything_sbDpClose': ANYTHING(sbDpClose), 'anything_sbTickerType': ANYTHING(sbTickerType)}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t0.sbDpTickerId, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t1.sbDpClose < t0.max_sbDpClose & t0.sbTickerType == t1.sbTickerType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol}) + AGGREGATE(keys={'sbTickerType': sbTickerType}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) + JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerType': t1.sbTickerType}) + AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={}) SCAN(table=main.sbDailyPrice, columns={'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) - SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) - SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) + JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) + SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) + SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.max_sbDpClose, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) diff --git a/tests/test_plan_refsols/multi_partition_access_4.txt b/tests/test_plan_refsols/multi_partition_access_4.txt index 9141bb5ec..f7be3c1ff 100644 --- a/tests/test_plan_refsols/multi_partition_access_4.txt +++ b/tests/test_plan_refsols/multi_partition_access_4.txt @@ -1,8 +1,8 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId_0 == t1.sbTxCustId & t0.anything_sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.anything_max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) - AGGREGATE(keys={'sbTxCustId_0': sbTxCustId}, aggregations={'anything_max_sbTxShares': ANYTHING(max_sbTxShares), 'anything_sbTxTickerId': ANYTHING(sbTxTickerId)}) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, columns={'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId}) - AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) - SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) + JOIN(condition=t1.sbTxShares < t0.max_max_sbTxShares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t1.sbTxShares >= t0.max_sbTxShares, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'max_max_sbTxShares': t0.max_sbTxShares, 'max_sbTxShares': t1.max_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares}) + AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxTickerId': sbTxTickerId}, aggregations={'max_sbTxShares': MAX(sbTxShares)}) + SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) SCAN(table=main.sbTransaction, columns={'sbTxCustId': sbTxCustId, 'sbTxId': sbTxId, 'sbTxShares': sbTxShares, 'sbTxTickerId': sbTxTickerId}) diff --git a/tests/test_sql_refsols/correl_30_sqlite.sql b/tests/test_sql_refsols/correl_30_sqlite.sql index dac0716dc..e0e7d6846 100644 --- a/tests/test_sql_refsols/correl_30_sqlite.sql +++ b/tests/test_sql_refsols/correl_30_sqlite.sql @@ -65,10 +65,7 @@ SELECT _s13.n_rows AS n_above_avg_suppliers FROM _s12 AS _s12 JOIN _s13 AS _s13 - ON _s13.n_nationkey = anything_n_nationkey -GROUP BY - nation.n_nationkey, - n_nationkey + ON _s12.n_nationkey = _s13.n_nationkey ORDER BY 1, 2 diff --git a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql index d8cd77ce8..e019f8e82 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_ansi.sql @@ -1,45 +1,42 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), + EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), CASE - WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))) >= 2 - THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME)), 1, 2) - ELSE SUBSTRING( - CONCAT('00', EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS DATETIME))), - ( - 2 * -1 - ) - ) + WHEN LENGTH(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))) >= 2 + THEN SUBSTRING(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 1, 2) + ELSE SUBSTRING(CONCAT('00', EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME))), ( + 2 * -1 + )) END ) AS month, - ANY_VALUE(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -47,6 +44,6 @@ SELECT ( ( sum_sum_sbdpclose / sum_count_sbdpclose - ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month NULLS LAST) - ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month NULLS LAST) AS momc + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month NULLS LAST) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_mysql.sql b/tests/test_sql_refsols/defog_broker_adv5_mysql.sql index fb9e56e78..6a857153f 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_mysql.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbDailyPrice.sbdpdate AS DATETIME)), - LPAD(EXTRACT(MONTH FROM CAST(sbDailyPrice.sbdpdate AS DATETIME)), 2, '0') + EXTRACT(YEAR FROM CAST(sbdpdate AS DATETIME)), + LPAD(EXTRACT(MONTH FROM CAST(sbdpdate AS DATETIME)), 2, '0') ) AS month, - ANY_VALUE(sbTicker.sbtickersymbol) AS anything_sbTickerSymbol, - COUNT(sbDailyPrice.sbdpclose) AS count_sbDpClose, - MAX(sbDailyPrice.sbdphigh) AS max_sbDpHigh, - MIN(sbDailyPrice.sbdplow) AS min_sbDpLow, - SUM(sbDailyPrice.sbdpclose) AS sum_sbDpClose - FROM main.sbDailyPrice AS sbDailyPrice - JOIN main.sbTicker AS sbTicker - ON sbDailyPrice.sbdptickerid = sbTicker.sbtickerid + sbdptickerid AS sbDpTickerId, + COUNT(sbdpclose) AS count_sbDpClose, + MAX(sbdphigh) AS max_sbDpHigh, + MIN(sbdplow) AS min_sbDpLow, + SUM(sbdpclose) AS sum_sbDpClose + FROM main.sbDailyPrice GROUP BY - sbDailyPrice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbTickerSymbol, - month, - MAX(max_sbDpHigh) AS max_max_sbDpHigh, - MIN(min_sbDpLow) AS min_min_sbDpLow, - SUM(count_sbDpClose) AS sum_count_sbDpClose, - SUM(sum_sbDpClose) AS sum_sum_sbDpClose - FROM _t1 + _s0.month, + sbTicker.sbtickersymbol AS sbTickerSymbol, + MAX(_s0.max_sbDpHigh) AS max_max_sbDpHigh, + MIN(_s0.min_sbDpLow) AS min_min_sbDpLow, + SUM(_s0.count_sbDpClose) AS sum_count_sbDpClose, + SUM(_s0.sum_sbDpClose) AS sum_sum_sbDpClose + FROM _s0 AS _s0 + JOIN main.sbTicker AS sbTicker + ON _s0.sbDpTickerId = sbTicker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbTickerSymbol AS symbol, + sbTickerSymbol AS symbol, month, sum_sum_sbDpClose / sum_count_sbDpClose AS avg_close, max_max_sbDpHigh AS max_high, @@ -38,6 +38,6 @@ SELECT ( ( sum_sum_sbDpClose / sum_count_sbDpClose - ) - LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY anything_sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) - ) / LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY anything_sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) AS momc + ) - LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) + ) / LAG(sum_sum_sbDpClose / sum_count_sbDpClose, 1) OVER (PARTITION BY sbTickerSymbol ORDER BY CASE WHEN month COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, month COLLATE utf8mb4_bin) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_postgres.sql b/tests/test_sql_refsols/defog_broker_adv5_postgres.sql index e206b89f7..738776d67 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_postgres.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - EXTRACT(YEAR FROM CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), - LPAD(CAST(EXTRACT(MONTH FROM CAST(sbdailyprice.sbdpdate AS TIMESTAMP)) AS TEXT), 2, '0') + EXTRACT(YEAR FROM CAST(sbdpdate AS TIMESTAMP)), + LPAD(CAST(EXTRACT(MONTH FROM CAST(sbdpdate AS TIMESTAMP)) AS TEXT), 2, '0') ) AS month, - MAX(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -38,6 +38,6 @@ SELECT CAST(( ( CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose - ) - LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) AS DOUBLE PRECISION) / LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS DOUBLE PRECISION) / LAG(CAST(sum_sum_sbdpclose AS DOUBLE PRECISION) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql index 29611e6f5..f37e844f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql @@ -1,36 +1,36 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - YEAR(CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), - LPAD(MONTH(CAST(sbdailyprice.sbdpdate AS TIMESTAMP)), 2, '0') + YEAR(CAST(sbdpdate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbdpdate AS TIMESTAMP)), 2, '0') ) AS month, - ANY_VALUE(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -38,6 +38,6 @@ SELECT ( ( sum_sum_sbdpclose / sum_count_sbdpclose - ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql index 76d37eb0c..57499068b 100644 --- a/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv5_sqlite.sql @@ -1,42 +1,42 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT CONCAT_WS( '-', - CAST(STRFTIME('%Y', sbdailyprice.sbdpdate) AS INTEGER), + CAST(STRFTIME('%Y', sbdpdate) AS INTEGER), CASE - WHEN LENGTH(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER)) >= 2 - THEN SUBSTRING(CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), 1, 2) - ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdailyprice.sbdpdate) AS INTEGER), ( + WHEN LENGTH(CAST(STRFTIME('%m', sbdpdate) AS INTEGER)) >= 2 + THEN SUBSTRING(CAST(STRFTIME('%m', sbdpdate) AS INTEGER), 1, 2) + ELSE SUBSTRING('00' || CAST(STRFTIME('%m', sbdpdate) AS INTEGER), ( 2 * -1 )) END ) AS month, - MAX(sbticker.sbtickersymbol) AS anything_sbtickersymbol, - COUNT(sbdailyprice.sbdpclose) AS count_sbdpclose, - MAX(sbdailyprice.sbdphigh) AS max_sbdphigh, - MIN(sbdailyprice.sbdplow) AS min_sbdplow, - SUM(sbdailyprice.sbdpclose) AS sum_sbdpclose - FROM main.sbdailyprice AS sbdailyprice - JOIN main.sbticker AS sbticker - ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice GROUP BY - sbdailyprice.sbdptickerid, - 1 + 1, + 2 ), _t0 AS ( SELECT - anything_sbtickersymbol, - month, - MAX(max_sbdphigh) AS max_max_sbdphigh, - MIN(min_sbdplow) AS min_min_sbdplow, - SUM(count_sbdpclose) AS sum_count_sbdpclose, - SUM(sum_sbdpclose) AS sum_sum_sbdpclose - FROM _t1 + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid GROUP BY 1, 2 ) SELECT - anything_sbtickersymbol AS symbol, + sbtickersymbol AS symbol, month, CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose AS avg_close, max_max_sbdphigh AS max_high, @@ -44,6 +44,6 @@ SELECT CAST(( ( CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose - ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) - ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY anything_sbtickersymbol ORDER BY month) AS momc + ) - LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS REAL) / LAG(CAST(sum_sum_sbdpclose AS REAL) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc FROM _t0 diff --git a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql index fd35dfa0a..fa322a6fc 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_ansi.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql b/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql index fd35dfa0a..fa322a6fc 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_mysql.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql b/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql index ae777ae80..c923f25aa 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_postgres.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - MAX(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS TIMESTAMP)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( CAST(( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) AS DOUBLE PRECISION) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) AS DOUBLE PRECISION) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql index 56a1c04c0..22a6080ca 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - ANY_VALUE(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - YEAR(CAST(sales.sale_date AS TIMESTAMP)) = 2023 + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( ( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql index f515a9cbb..8309fef26 100644 --- a/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_adv11_sqlite.sql @@ -1,19 +1,19 @@ -WITH _t1 AS ( +WITH _s0 AS ( SELECT - MAX(cars.cost) AS anything_cost, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.cars AS cars - ON cars._id = sales.car_id + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - sales.car_id + 1 ) SELECT ( CAST(( - COALESCE(SUM(sum_sale_price), 0) - COALESCE(SUM(anything_cost), 0) - ) AS REAL) / COALESCE(SUM(anything_cost), 0) + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) AS REAL) / COALESCE(SUM(cars.cost), 0) ) * 100 AS GPM -FROM _t1 +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql index 3e9cb8d22..5acfd1845 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_ansi.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql b/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql index 4c6e93a0b..7233adba8 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_mysql.sql @@ -1,37 +1,37 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT STR_TO_DATE( CONCAT( - YEAR(CAST(sales.sale_date AS DATETIME)), + YEAR(CAST(sale_date AS DATETIME)), ' ', - QUARTER(CAST(sales.sale_date AS DATETIME)) * 3 - 2, + QUARTER(CAST(sale_date AS DATETIME)) * 3 - 2, ' 1' ), '%Y %c %e' ) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS DATETIME)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS DATETIME)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state COLLATE utf8mb4_bin AS customer_state, + state COLLATE utf8mb4_bin AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql b/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql index 7b48e2ed5..858012735 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_postgres.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - MAX(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - EXTRACT(YEAR FROM CAST(sales.sale_date AS TIMESTAMP)) = 2023 + EXTRACT(YEAR FROM CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql index 2c0414fc9..f9b3c5bb4 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql @@ -1,29 +1,29 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT - DATE_TRUNC('QUARTER', CAST(sales.sale_date AS TIMESTAMP)) AS quarter, - ANY_VALUE(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - YEAR(CAST(sales.sale_date AS TIMESTAMP)) = 2023 + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql index 669018a02..9a3119405 100644 --- a/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql +++ b/tests/test_sql_refsols/defog_dealership_gen4_sqlite.sql @@ -1,37 +1,37 @@ -WITH _t2 AS ( +WITH _s0 AS ( SELECT DATE( - sales.sale_date, + sale_date, 'start of month', '-' || CAST(( ( - CAST(STRFTIME('%m', DATETIME(sales.sale_date)) AS INTEGER) - 1 + CAST(STRFTIME('%m', DATETIME(sale_date)) AS INTEGER) - 1 ) % 3 ) AS TEXT) || ' months' ) AS quarter, - MAX(customers.state) AS anything_state, - SUM(sales.sale_price) AS sum_sale_price - FROM main.sales AS sales - JOIN main.customers AS customers - ON customers._id = sales.customer_id + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales WHERE - CAST(STRFTIME('%Y', sales.sale_date) AS INTEGER) = 2023 + CAST(STRFTIME('%Y', sale_date) AS INTEGER) = 2023 GROUP BY - sales.customer_id, - 1 + 1, + 2 ), _t1 AS ( SELECT - anything_state, - quarter, - SUM(sum_sale_price) AS sum_sum_sale_price - FROM _t2 + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id GROUP BY 1, 2 ) SELECT quarter, - anything_state AS customer_state, + state AS customer_state, sum_sum_sale_price AS total_sales FROM _t1 WHERE diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql index 9c3b1c7d9..3ae3d4eba 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_ansi.sql @@ -10,15 +10,6 @@ WITH _s0 AS ( pr_id, pr_release FROM main.products -), _t1 AS ( - SELECT - COUNT(*) AS n_rows_1, - ANY_VALUE(_s1.pr_release) AS pr_release - FROM main.devices AS devices - JOIN _s1 AS _s1 - ON _s1.pr_id = devices.de_product_id - GROUP BY - devices.de_product_id ), _s6 AS ( SELECT EXTRACT(YEAR FROM CAST(_s1.pr_release AS DATETIME)) AS year_pr_release, diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql index 5de1ba7f3..473886a2e 100644 --- a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_sqlite.sql @@ -10,15 +10,6 @@ WITH _s0 AS ( pr_id, pr_release FROM main.products -), _t1 AS ( - SELECT - COUNT(*) AS n_rows_1, - MAX(_s1.pr_release) AS pr_release - FROM main.devices AS devices - JOIN _s1 AS _s1 - ON _s1.pr_id = devices.de_product_id - GROUP BY - devices.de_product_id ), _s6 AS ( SELECT CAST(STRFTIME('%Y', _s1.pr_release) AS INTEGER) AS year_pr_release, From f130bc3f7bd89c4a8e6a1db9e63f22ea0838037d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 18 Sep 2025 13:58:22 -0400 Subject: [PATCH 96/97] WIP --- pydough/conversion/join_agg_transpose.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py index 6f32751b1..0a46234ba 100644 --- a/pydough/conversion/join_agg_transpose.py +++ b/pydough/conversion/join_agg_transpose.py @@ -14,6 +14,7 @@ Join, JoinCardinality, JoinType, + Project, RelationalExpression, RelationalNode, RelationalRoot, @@ -139,6 +140,10 @@ def join_aggregate_transpose( ): return None + # A mapping that will be used to map every expression with regards to + # the original join looking at its input expressions to what the + # expression will be in the output columns of the new aggregate + new_join_columns: dict[str, RelationalExpression] = {} new_aggregate_aggs: dict[str, CallExpression] = {} new_aggregate_keys: dict[str, RelationalExpression] = {} @@ -150,6 +155,10 @@ def join_aggregate_transpose( [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] ) + project_columns: dict[str, RelationalExpression] = {} + + assert False + # TODO: FINISH THIS return None @@ -167,7 +176,7 @@ def join_aggregate_transpose( new_join, new_aggregate_keys, new_aggregate_aggs ) - return new_aggregate + return Project(new_aggregate, project_columns) def pull_joins_after_aggregates(node: RelationalRoot) -> RelationalNode: From 401c1bcb8f5fcc88631d744711a0d4ec456a18c3 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 6 Oct 2025 15:17:37 -0400 Subject: [PATCH 97/97] Resolving conflicts --- tests/test_plan_refsols/cryptbank_agg_03.txt | 9 ----- tests/test_plan_refsols/cryptbank_agg_04.txt | 5 --- tests/test_plan_refsols/cryptbank_agg_05.txt | 10 ----- .../cryptbank_analysis_01.txt | 22 ----------- .../cryptbank_analysis_02.txt | 22 ----------- .../cryptbank_analysis_03.txt | 39 ------------------- .../cryptbank_analysis_04.txt | 14 ------- .../cryptbank_filter_count_11.txt | 14 ------- .../cryptbank_filter_count_12.txt | 5 --- .../cryptbank_filter_count_13.txt | 5 --- .../cryptbank_filter_count_15.txt | 10 ----- .../cryptbank_filter_count_16.txt | 10 ----- .../cryptbank_filter_count_28.txt | 7 ---- .../cryptbank_general_join_01.txt | 16 -------- .../cryptbank_general_join_02.txt | 11 ------ 15 files changed, 199 deletions(-) delete mode 100644 tests/test_plan_refsols/cryptbank_agg_03.txt delete mode 100644 tests/test_plan_refsols/cryptbank_agg_04.txt delete mode 100644 tests/test_plan_refsols/cryptbank_agg_05.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_01.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_02.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_03.txt delete mode 100644 tests/test_plan_refsols/cryptbank_analysis_04.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_11.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_12.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_13.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_15.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_16.txt delete mode 100644 tests/test_plan_refsols/cryptbank_filter_count_28.txt delete mode 100644 tests/test_plan_refsols/cryptbank_general_join_01.txt delete mode 100644 tests/test_plan_refsols/cryptbank_general_join_02.txt diff --git a/tests/test_plan_refsols/cryptbank_agg_03.txt b/tests/test_plan_refsols/cryptbank_agg_03.txt deleted file mode 100644 index 464b8634c..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_03.txt +++ /dev/null @@ -1,9 +0,0 @@ -ROOT(columns=[('account_type', a_type), ('balance', a_balance), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname))], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[a_type], order=[(a_balance):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) -<<<<<<< HEAD - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) -======= - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_agg_04.txt b/tests/test_plan_refsols/cryptbank_agg_04.txt deleted file mode 100644 index 8d096df95..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_04.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[]) - JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_a_balance': t1.sum_a_balance}) - SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key}) - AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_a_balance': SUM(a_balance)}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey}) diff --git a/tests/test_plan_refsols/cryptbank_agg_05.txt b/tests/test_plan_refsols/cryptbank_agg_05.txt deleted file mode 100644 index f7672b643..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_05.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('avg_secs', ROUND(avg_expr, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'avg_expr': AVG(DATEDIFF('seconds':string, a_open_ts, min_t_ts))}) -<<<<<<< HEAD - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'a_open_ts': t0.a_open_ts, 'min_t_ts': t1.min_t_ts}) -======= - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_open_ts': t0.a_open_ts, 'min_t_ts': t1.min_t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'min_t_ts': MIN(t_ts)}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_01.txt b/tests/test_plan_refsols/cryptbank_analysis_01.txt deleted file mode 100644 index c23ce20f3..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_01.txt +++ /dev/null @@ -1,22 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_02.txt b/tests/test_plan_refsols/cryptbank_analysis_02.txt deleted file mode 100644 index 3525b5c77..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_02.txt +++ /dev/null @@ -1,22 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_03.txt b/tests/test_plan_refsols/cryptbank_analysis_03.txt deleted file mode 100644 index 363340648..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_03.txt +++ /dev/null @@ -1,39 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(agg_1, 0:numeric)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) + DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.sum_t_amount, 'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - JOIN(condition=t0.c_key == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.sum_t_amount, 'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) -<<<<<<< HEAD - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) -======= - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04.txt b/tests/test_plan_refsols/cryptbank_analysis_04.txt deleted file mode 100644 index e8220860e..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_04.txt +++ /dev/null @@ -1,14 +0,0 @@ -ROOT(columns=[('key', a_key), ('cust_name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('n_trans', n_rows)], orderings=[(a_key):asc_first]) -<<<<<<< HEAD - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) -======= - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(c_birthday), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) - FILTER(condition=t_amount > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11.txt b/tests/test_plan_refsols/cryptbank_filter_count_11.txt deleted file mode 100644 index 40a01c208..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_11.txt +++ /dev/null @@ -1,14 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) -======= - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) ->>>>>>> main - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=c_fname == 'alice':string, columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_12.txt b/tests/test_plan_refsols/cryptbank_filter_count_12.txt deleted file mode 100644 index 03bc19679..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_12.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=YEAR(t0.t_ts) == YEAR(t1.a_open_ts) & t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_13.txt b/tests/test_plan_refsols/cryptbank_filter_count_13.txt deleted file mode 100644 index 836560846..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_13.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_ts < DATETIME(t1.a_open_ts, '+2 years':string) & t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_destaccount': t_destaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_15.txt b/tests/test_plan_refsols/cryptbank_filter_count_15.txt deleted file mode 100644 index b6667b78a..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_15.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=SEMI, columns={}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type == 'retirement':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) -======= - SCAN(table=CRBNK.CUSTOMERS, columns={}) ->>>>>>> main diff --git a/tests/test_plan_refsols/cryptbank_filter_count_16.txt b/tests/test_plan_refsols/cryptbank_filter_count_16.txt deleted file mode 100644 index 373780af6..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_16.txt +++ /dev/null @@ -1,10 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.c_key == t1.a_custkey, type=SEMI, columns={}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type != 'checking':string & a_type != 'savings':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) -======= - SCAN(table=CRBNK.CUSTOMERS, columns={}) ->>>>>>> main diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28.txt b/tests/test_plan_refsols/cryptbank_filter_count_28.txt deleted file mode 100644 index 4e8ab891b..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_28.txt +++ /dev/null @@ -1,7 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(a_open_ts) < 2020:numeric & a_balance >= 5000:numeric & a_type == 'retirement':string | a_type == 'savings':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) - FILTER(condition=CONTAINS(c_email, 'outlook':string) | CONTAINS(c_email, 'gmail':string), columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01.txt b/tests/test_plan_refsols/cryptbank_general_join_01.txt deleted file mode 100644 index 6cdffeadb..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_01.txt +++ /dev/null @@ -1,16 +0,0 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) -<<<<<<< HEAD - JOIN(condition=t0.b_key == t1.b_key & t0.c_key == t1.c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) -======= - JOIN(condition=t0.b_key == t1.b_key & t0.c_key == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) ->>>>>>> main - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - AGGREGATE(keys={'b_key': b_key, 'c_key': c_key}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_key == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02.txt b/tests/test_plan_refsols/cryptbank_general_join_02.txt deleted file mode 100644 index 4a22534b7..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_02.txt +++ /dev/null @@ -1,11 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) -<<<<<<< HEAD - JOIN(condition=t0.a_custkey == t1.c_key & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t0.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) -======= - SCAN(table=CRBNK.ACCOUNTS, columns={}) ->>>>>>> main