|
66 | 66 | HybridCollectionAccess,
|
67 | 67 | HybridFilter,
|
68 | 68 | HybridLimit,
|
| 69 | + HybridNoop, |
69 | 70 | HybridOperation,
|
70 | 71 | HybridPartition,
|
71 | 72 | HybridPartitionChild,
|
@@ -421,6 +422,7 @@ def populate_children(
|
421 | 422 | if (
|
422 | 423 | name in hybrid.ancestral_mapping
|
423 | 424 | or name in hybrid.pipeline[-1].terms
|
| 425 | + or subtree.ancestral_mapping[name] == 0 |
424 | 426 | ):
|
425 | 427 | continue
|
426 | 428 | hybrid_back_expr = self.make_hybrid_expr(
|
@@ -867,97 +869,6 @@ def rewrite_quantile_call(
|
867 | 869 |
|
868 | 870 | return max_call
|
869 | 871 |
|
870 |
| - def make_hybrid_correl_expr( |
871 |
| - self, |
872 |
| - back_expr: BackReferenceExpression, |
873 |
| - collection: PyDoughCollectionQDAG, |
874 |
| - steps_taken_so_far: int, |
875 |
| - down_shift: int, |
876 |
| - ) -> HybridCorrelExpr: |
877 |
| - """ |
878 |
| - Converts a BACK reference into a correlated reference when the number |
879 |
| - of BACK levels exceeds the height of the current subtree. |
880 |
| -
|
881 |
| - Args: |
882 |
| - `back_expr`: the original BACK reference to be converted. |
883 |
| - `collection`: the collection at the top of the current subtree, |
884 |
| - before we have run out of BACK levels to step up out of. |
885 |
| - `steps_taken_so_far`: the number of steps already taken to step |
886 |
| - up from the BACK node. This is needed so we know how many steps |
887 |
| - still need to be taken upward once we have stepped out of the child |
888 |
| - subtree back into the parent subtree. |
889 |
| - `down_shift`: a factor that should be subtracted from the final |
890 |
| - back shift when creating a term in the form CORREL(BACK(n).x), to |
891 |
| - account for edge cases involving PARTITION nodes. Starts as 0, and |
892 |
| - is incremented as-needed. |
893 |
| - """ |
894 |
| - if len(self.stack) == 0: |
895 |
| - raise ValueError("Back reference steps too far back") |
896 |
| - # Identify the parent subtree that the BACK reference is stepping back |
897 |
| - # into, out of the child. |
898 |
| - parent_tree: HybridTree = self.stack.pop() |
899 |
| - remaining_steps_back: int = back_expr.back_levels - steps_taken_so_far - 1 |
900 |
| - parent_result: HybridExpr |
901 |
| - new_expr: PyDoughExpressionQDAG |
902 |
| - # Special case: stepping out of the data argument of PARTITION back |
903 |
| - # into its ancestor. For example: |
904 |
| - # TPCH.CALCULATE(x=...).PARTITION(data.WHERE(y > BACK(1).x), ...) |
905 |
| - partition_edge_case: bool = len(parent_tree.pipeline) == 1 and isinstance( |
906 |
| - parent_tree.pipeline[0], HybridPartition |
907 |
| - ) |
908 |
| - if partition_edge_case: |
909 |
| - next_hybrid: HybridTree |
910 |
| - if parent_tree.parent is not None: |
911 |
| - # If the parent tree has a parent, then we can step back |
912 |
| - # into the parent tree's parent, which is the context for |
913 |
| - # the partition. |
914 |
| - next_hybrid = parent_tree.parent |
915 |
| - else: |
916 |
| - assert len(self.stack) > 0, "Back reference steps too far back" |
917 |
| - next_hybrid = self.stack[-1] |
918 |
| - # Treat the partition's parent as the context for the back |
919 |
| - # to step into, as opposed to the partition itself (so the back |
920 |
| - # levels are consistent) |
921 |
| - self.stack.append(next_hybrid) |
922 |
| - parent_result = self.make_hybrid_correl_expr( |
923 |
| - back_expr, collection, steps_taken_so_far, down_shift + 1 |
924 |
| - ).expr |
925 |
| - self.stack.pop() |
926 |
| - elif remaining_steps_back == 0: |
927 |
| - # If there are no more steps back to be made, then the correlated |
928 |
| - # reference is to a reference from the current context. |
929 |
| - if back_expr.term_name in parent_tree.ancestral_mapping: |
930 |
| - new_expr = BackReferenceExpression( |
931 |
| - collection, |
932 |
| - back_expr.term_name, |
933 |
| - parent_tree.ancestral_mapping[back_expr.term_name] - down_shift, |
934 |
| - ) |
935 |
| - parent_result = self.make_hybrid_expr(parent_tree, new_expr, {}, False) |
936 |
| - elif back_expr.term_name in parent_tree.pipeline[-1].terms: |
937 |
| - parent_name: str = parent_tree.pipeline[-1].renamings.get( |
938 |
| - back_expr.term_name, back_expr.term_name |
939 |
| - ) |
940 |
| - parent_result = HybridRefExpr(parent_name, back_expr.pydough_type) |
941 |
| - else: |
942 |
| - raise ValueError( |
943 |
| - f"Back reference to {back_expr.term_name} not found in parent" |
944 |
| - ) |
945 |
| - else: |
946 |
| - # Otherwise, a back reference needs to be made from the current |
947 |
| - # collection a number of steps back based on how many steps still |
948 |
| - # need to be taken, and it must be recursively converted to a |
949 |
| - # hybrid expression that gets wrapped in a correlated reference. |
950 |
| - new_expr = BackReferenceExpression( |
951 |
| - collection, back_expr.term_name, remaining_steps_back |
952 |
| - ) |
953 |
| - parent_result = self.make_hybrid_expr(parent_tree, new_expr, {}, False) |
954 |
| - # Restore parent_tree back onto the stack, since evaluating `back_expr` |
955 |
| - # does not change the program's current placement in the subtrees. |
956 |
| - self.stack.append(parent_tree) |
957 |
| - # Create the correlated reference to the expression with regards to |
958 |
| - # the parent tree, which could also be a correlated expression. |
959 |
| - return HybridCorrelExpr(parent_result) |
960 |
| - |
961 | 872 | def add_unique_terms(
|
962 | 873 | self,
|
963 | 874 | hybrid: HybridTree,
|
@@ -1046,6 +957,74 @@ def add_unique_terms(
|
1046 | 957 | child_idx,
|
1047 | 958 | )
|
1048 | 959 |
|
| 960 | + def translate_back_reference( |
| 961 | + self, hybrid: HybridTree, expr: BackReferenceExpression |
| 962 | + ) -> HybridExpr: |
| 963 | + """ |
| 964 | + Perform the logic used to translate a BACK reference in QDAG into a |
| 965 | + back reference in hybrid, or a correlated reference if the back |
| 966 | + reference steps back further than the height of the current hybrid |
| 967 | + tree. |
| 968 | +
|
| 969 | + Args: |
| 970 | + `hybrid`: the hybrid tree that should be used to derive the |
| 971 | + translation of `expr`, as it is the context in which the `expr` |
| 972 | + will live. |
| 973 | + `expr`: the BACK reference to be converted. |
| 974 | +
|
| 975 | + Returns: |
| 976 | + The HybridExpr node corresponding to `expr`. |
| 977 | + """ |
| 978 | + back_levels: int = 0 |
| 979 | + correl_levels: int = 0 |
| 980 | + new_stack: list[HybridTree] = [] |
| 981 | + ancestor_tree: HybridTree = hybrid |
| 982 | + expr_name: str = expr.term_name |
| 983 | + # Start with the current context and hunt for an ancestor with |
| 984 | + # that name pinned by a CALCULATE (so it is in the ancestral |
| 985 | + # mapping), and make sure it is within the height bounds of the |
| 986 | + # current tree. If not, then pop the previous tree from the |
| 987 | + # stack and look there, repeating until one is found or the |
| 988 | + # stack is exhausted. Keep track of how many times we step |
| 989 | + # outward, since this is how many CORREL() layers we need to |
| 990 | + # wrap the final expression in. |
| 991 | + while True: |
| 992 | + if ( |
| 993 | + expr.term_name in ancestor_tree.ancestral_mapping |
| 994 | + and ancestor_tree.ancestral_mapping[expr.term_name] |
| 995 | + < ancestor_tree.get_tree_height() |
| 996 | + ): |
| 997 | + back_levels = ancestor_tree.ancestral_mapping[expr.term_name] |
| 998 | + for _ in range(back_levels): |
| 999 | + assert ancestor_tree.parent is not None |
| 1000 | + ancestor_tree = ancestor_tree.parent |
| 1001 | + expr_name = ancestor_tree.pipeline[-1].renamings.get( |
| 1002 | + expr_name, expr_name |
| 1003 | + ) |
| 1004 | + break |
| 1005 | + elif len(self.stack) > 0: |
| 1006 | + ancestor_tree = self.stack.pop() |
| 1007 | + new_stack.append(ancestor_tree) |
| 1008 | + correl_levels += 1 |
| 1009 | + else: |
| 1010 | + raise ValueError("Cannot find ancestor with name " + str(expr)) |
| 1011 | + for tree in reversed(new_stack): |
| 1012 | + self.stack.append(tree) |
| 1013 | + |
| 1014 | + # The final expression is a regular or back reference depending |
| 1015 | + # on how many back levels it is from the identified ancestor. |
| 1016 | + result: HybridExpr |
| 1017 | + if back_levels == 0: |
| 1018 | + result = HybridRefExpr(expr_name, expr.pydough_type) |
| 1019 | + else: |
| 1020 | + result = HybridBackRefExpr(expr_name, back_levels, expr.pydough_type) |
| 1021 | + |
| 1022 | + # Then, wrap it in the necessary number of CORREL() layers. |
| 1023 | + for _ in range(correl_levels): |
| 1024 | + result = HybridCorrelExpr(result) |
| 1025 | + |
| 1026 | + return result |
| 1027 | + |
1049 | 1028 | def make_hybrid_expr(
|
1050 | 1029 | self,
|
1051 | 1030 | hybrid: HybridTree,
|
@@ -1074,7 +1053,6 @@ def make_hybrid_expr(
|
1074 | 1053 | child_connection: HybridConnection
|
1075 | 1054 | args: list[HybridExpr] = []
|
1076 | 1055 | hybrid_arg: HybridExpr
|
1077 |
| - ancestor_tree: HybridTree |
1078 | 1056 | collection: PyDoughCollectionQDAG
|
1079 | 1057 | match expr:
|
1080 | 1058 | case PartitionKey():
|
@@ -1105,30 +1083,8 @@ def make_hybrid_expr(
|
1105 | 1083 | else:
|
1106 | 1084 | return HybridRefExpr(expr.term_name, expr.pydough_type)
|
1107 | 1085 | case BackReferenceExpression():
|
1108 |
| - # A reference to an expression from an ancestor becomes a |
1109 |
| - # reference to one of the terms of a parent level of the hybrid |
1110 |
| - # tree. If the BACK goes far enough that it must step outside |
1111 |
| - # a child subtree into the parent, a correlated reference is |
1112 |
| - # created. |
1113 |
| - ancestor_tree = hybrid |
1114 |
| - true_steps_back: int = 0 |
1115 |
| - # Keep stepping backward until `expr.back_levels` non-hidden |
1116 |
| - # steps have been taken. |
1117 |
| - collection = expr.collection |
1118 |
| - while true_steps_back < expr.back_levels: |
1119 |
| - assert collection.ancestor_context is not None |
1120 |
| - collection = collection.ancestor_context |
1121 |
| - if ancestor_tree.parent is None: |
1122 |
| - return self.make_hybrid_correl_expr( |
1123 |
| - expr, collection, true_steps_back, 0 |
1124 |
| - ) |
1125 |
| - ancestor_tree = ancestor_tree.parent |
1126 |
| - if not ancestor_tree.is_hidden_level: |
1127 |
| - true_steps_back += 1 |
1128 |
| - expr_name = ancestor_tree.pipeline[-1].renamings.get( |
1129 |
| - expr.term_name, expr.term_name |
1130 |
| - ) |
1131 |
| - return HybridBackRefExpr(expr_name, expr.back_levels, expr.pydough_type) |
| 1086 | + return self.translate_back_reference(hybrid, expr) |
| 1087 | + |
1132 | 1088 | case Reference():
|
1133 | 1089 | if hybrid.ancestral_mapping.get(expr.term_name, 0) > 0:
|
1134 | 1090 | collection = expr.collection
|
@@ -1497,6 +1453,8 @@ def make_hybrid_tree(
|
1497 | 1453 | hybrid.pipeline[-1].orderings,
|
1498 | 1454 | )
|
1499 | 1455 | )
|
| 1456 | + for name in new_expressions: |
| 1457 | + hybrid.ancestral_mapping[name] = 0 |
1500 | 1458 | return hybrid
|
1501 | 1459 | case Singular():
|
1502 | 1460 | # a Singular node is just used to annotate the preceding context
|
@@ -1551,6 +1509,11 @@ def make_hybrid_tree(
|
1551 | 1509 | ].subtree
|
1552 | 1510 | partition_child.agg_keys = key_exprs
|
1553 | 1511 | partition_child.join_keys = [(k, k) for k in key_exprs]
|
| 1512 | + # Add a dummy no-op after the partition to ensure a |
| 1513 | + # buffer in the max_steps for other children. |
| 1514 | + successor_hybrid.add_operation( |
| 1515 | + HybridNoop(successor_hybrid.pipeline[-1]) |
| 1516 | + ) |
1554 | 1517 | return successor_hybrid
|
1555 | 1518 | case OrderBy() | TopK():
|
1556 | 1519 | hybrid = self.make_hybrid_tree(
|
@@ -1630,6 +1593,11 @@ def make_hybrid_tree(
|
1630 | 1593 | successor_hybrid.children[
|
1631 | 1594 | partition_child_idx
|
1632 | 1595 | ].subtree.agg_keys = key_exprs
|
| 1596 | + # Add a dummy no-op after the partition to ensure a |
| 1597 | + # buffer in the max_steps for other children. |
| 1598 | + successor_hybrid.add_operation( |
| 1599 | + HybridNoop(successor_hybrid.pipeline[-1]) |
| 1600 | + ) |
1633 | 1601 | case GlobalContext():
|
1634 | 1602 | # This is a special case where the child access
|
1635 | 1603 | # is a global context, which means that the child is
|
|
0 commit comments