diff --git a/pydough/conversion/masking_shuttles.py b/pydough/conversion/masking_shuttles.py new file mode 100644 index 000000000..53ce2097e --- /dev/null +++ b/pydough/conversion/masking_shuttles.py @@ -0,0 +1,147 @@ +""" +Logic for replacing `UNMASK(x) == literal` (and similar expressions) with +`x == MASK(literal)`. +""" + +__all__ = ["MaskLiteralComparisonShuttle"] + +import pydough.pydough_operators as pydop +from pydough.relational import ( + CallExpression, + LiteralExpression, + RelationalExpression, + RelationalExpressionShuttle, +) +from pydough.types import ArrayType, PyDoughType, UnknownType + + +class MaskLiteralComparisonShuttle(RelationalExpressionShuttle): + """ + A shuttle that recursively performs the following replacements: + - `UNMASK(x) == literal` -> `x == MASK(literal)` + - `literal == UNMASK(x)` -> `MASK(literal) == x` + - `UNMASK(x) != literal` -> `x != MASK(literal)` + - `literal != UNMASK(x)` -> `MASK(literal) != x` + - `UNMASK(x) IN (literal1, ..., literalN)` -> `x IN (MASK(literal1), ..., MASK(literalN))` + """ + + def rewrite_masked_literal_comparison( + self, + original_call: CallExpression, + call_arg: CallExpression, + literal_arg: LiteralExpression, + ) -> CallExpression: + """ + Performs a rewrite of a comparison between a call to UNMASK and a + literal, which is either equality, inequality, or containment. + + Args: + `original_call`: The original call expression representing the + comparison. + `call_arg`: The argument to the comparison that is a call to + UNMASK, which is treated as the left-hand side of the comparison. + `literal_arg`: The argument to the comparison that is a literal, + which is treated as the right-hand side of the comparison. + + Returns: + A new call expression representing the rewritten comparison, or + the original call expression if no rewrite was performed. + """ + + # Verify that the call argument is indeed an UNMASK operation, otherwise + # fall back to the original. + if ( + not isinstance(call_arg.op, pydop.MaskedExpressionFunctionOperator) + or not call_arg.op.is_unmask + ): + return original_call + + masked_literal: RelationalExpression + + if original_call.op in (pydop.EQU, pydop.NEQ): + # If the operation is equality or inequality, we can simply wrap the + # literal in a call to MASK by toggling is_unmask to False. + masked_literal = CallExpression( + pydop.MaskedExpressionFunctionOperator( + call_arg.op.masking_metadata, False + ), + call_arg.data_type, + [literal_arg], + ) + elif original_call.op == pydop.ISIN and isinstance( + literal_arg.value, (list, tuple) + ): + # If the operation is containment, and the literal is a list/tuple, + # we need to build a list by wrapping each element of the tuple in + # a MASK call. + inner_type: PyDoughType + if isinstance(literal_arg.data_type, ArrayType): + inner_type = literal_arg.data_type.elem_type + else: + inner_type = UnknownType() + masked_literal = LiteralExpression( + [ + CallExpression( + pydop.MaskedExpressionFunctionOperator( + call_arg.op.masking_metadata, False + ), + call_arg.data_type, + [LiteralExpression(v, inner_type)], + ) + for v in literal_arg.value + ], + original_call.data_type, + ) + else: + # Otherwise, return the original. + return original_call + + # Now that we have the masked literal, we can return a new call + # expression with the same operators as before, but where the left hand + # side argument is the expression that was being unmasked, and the right + # hand side is the masked literal. + return CallExpression( + original_call.op, + original_call.data_type, + [call_arg.inputs[0], masked_literal], + ) + + def visit_call_expression( + self, call_expression: CallExpression + ) -> RelationalExpression: + # If the call expression is equality or inequality, dispatch to the + # rewrite logic if one argument is a call expression and the other is + # a literal. + if call_expression.op in (pydop.EQU, pydop.NEQ): + if isinstance(call_expression.inputs[0], CallExpression) and isinstance( + call_expression.inputs[1], LiteralExpression + ): + call_expression = self.rewrite_masked_literal_comparison( + call_expression, + call_expression.inputs[0], + call_expression.inputs[1], + ) + if isinstance(call_expression.inputs[1], CallExpression) and isinstance( + call_expression.inputs[0], LiteralExpression + ): + call_expression = self.rewrite_masked_literal_comparison( + call_expression, + call_expression.inputs[1], + call_expression.inputs[0], + ) + + # If the call expression is containment, dispatch to the rewrite logic + # if the first argument is a call expression and the second is a + # literal. + if ( + call_expression.op == pydop.ISIN + and isinstance(call_expression.inputs[0], CallExpression) + and isinstance(call_expression.inputs[1], LiteralExpression) + ): + call_expression = self.rewrite_masked_literal_comparison( + call_expression, call_expression.inputs[0], call_expression.inputs[1] + ) + + # Regardless of whether the rewrite occurred or not, invoke the regular + # logic which will recursively transform the arguments. + return super().visit_call_expression(call_expression) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 0f0da3ffc..1216e0aa3 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -6,6 +6,7 @@ __all__ = ["convert_ast_to_relational"] +import os from collections.abc import Iterable from dataclasses import dataclass @@ -85,6 +86,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections from .relational_simplification import simplify_expressions @@ -1663,6 +1665,10 @@ def convert_ast_to_relational( # Invoke the optimization procedures on the result to clean up the tree. additional_shuttles: list[RelationalExpressionShuttle] = [] + # Add the mask literal comparison shuttle if the environment variable + # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. + if os.getenv("PYDOUGH_ENABLE_MASK_REWRITES") == "1": + additional_shuttles.append(MaskLiteralComparisonShuttle()) optimized_result: RelationalRoot = optimize_relational_tree( raw_result, configs, additional_shuttles ) diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index 2aa9b85dd..264dad668 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -312,9 +312,22 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No def visit_literal_expression(self, literal_expression: LiteralExpression) -> None: # Note: This assumes each literal has an associated type that can be parsed # and types do not represent implicit casts. - literal: SQLGlotExpression = sqlglot_expressions.convert( - literal_expression.value - ) + literal: SQLGlotExpression + if isinstance(literal_expression.value, (tuple, list)): + # If the literal is a list or tuple, convert each element + # individually and create an array literal. + elements: list[SQLGlotExpression] = [] + for element in literal_expression.value: + element_expr: SQLGlotExpression + if isinstance(element, RelationalExpression): + element.accept(self) + element_expr = self._stack.pop() + else: + element_expr = sqlglot_expressions.convert(element) + elements.append(element_expr) + literal = sqlglot_expressions.Array(expressions=elements) + else: + literal = sqlglot_expressions.convert(literal_expression.value) # Special handling: insert cast calls for ansi casting of date/time # instead of relying on SQLGlot conversion functions. This is because diff --git a/tests/test_metadata/sf_masked_examples.json b/tests/test_metadata/sf_masked_examples.json index 1ec103a51..4232f56bd 100644 --- a/tests/test_metadata/sf_masked_examples.json +++ b/tests/test_metadata/sf_masked_examples.json @@ -218,7 +218,7 @@ "data type": "string", "server masked": true, "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", - "protect protocol": "PTY_PROTECT({}, 'deName)", + "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The first name of the customer", "sample values": ["Julie", "Melissa", "Gary"], "synonyms": ["customer first name", "given name"] diff --git a/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt index 4f1b912cb..f0cb980e1 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(LOWER([c_lname])) == 'lee':string, columns={}) + FILTER(condition=c_lname == MASK::(UPPER(['lee':string])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt index 411ff99fe..7558820f5 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(LOWER([c_lname])) != 'lee':string, columns={}) + FILTER(condition=c_lname != MASK::(UPPER(['lee':string])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt index f881e5028..96ddb590e 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(UNMASK::(LOWER([c_lname])), ['lee', 'smith', 'rodriguez']:array[unknown]), columns={}) + FILTER(condition=ISIN(c_lname, [Call(op=MASK, inputs=[Literal(value='lee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='smith', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='rodriguez', type=UnknownType())], return_type=StringType())]:bool), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt index afcecfd28..a8ebb4a29 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(UNMASK::(LOWER([c_lname])), ['lee', 'smith', 'rodriguez']:array[unknown])), columns={}) + FILTER(condition=NOT(ISIN(c_lname, [Call(op=MASK, inputs=[Literal(value='lee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='smith', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='rodriguez', type=UnknownType())], return_type=StringType())]:bool)), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt index f29a000d2..4d56d54dc 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) == '1985-04-12':string, columns={}) + FILTER(condition=c_birthday == MASK::(DATE(['1985-04-12':string], '-472 days')), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt index 6fc64fe4b..02a4823f6 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt @@ -4,5 +4,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=UNMASK::(LOWER([c_fname])) == 'alice':string, columns={'c_key': c_key}) + FILTER(condition=c_fname == MASK::(UPPER(['alice':string])), columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt index 7ffffccd0..de414aa9c 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt @@ -2,5 +2,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=SEMI, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string, columns={'a_custkey': a_custkey}) + FILTER(condition=a_type == MASK::(SUBSTRING(['retirement':string], 2) || SUBSTRING(['retirement':string], 1, 1)), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt index 9c056e0ad..1b2fcbc78 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt @@ -2,5 +2,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=SEMI, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) != 'checking':string & UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) != 'savings':string, columns={'a_custkey': a_custkey}) + FILTER(condition=a_type != MASK::(SUBSTRING(['checking':string], 2) || SUBSTRING(['checking':string], 1, 1)) & a_type != MASK::(SUBSTRING(['savings':string], 2) || SUBSTRING(['savings':string], 1, 1)), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt index 473a4d7f7..d5cb152e8 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) == '1991-11-15':string, columns={}) + FILTER(condition=c_birthday == MASK::(DATE(['1991-11-15':string], '-472 days')), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt index 02f3fd20d..5face6b66 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) | UNMASK::(DATE([c_birthday], '+472 days')) != '1991-11-15':string, columns={}) + FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) | c_birthday != MASK::(DATE(['1991-11-15':string], '-472 days')), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt index 540da15bd..34e03d1e0 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')) == '555-123-456':string, columns={}) + FILTER(condition=c_phone == MASK::(REPLACE(REPLACE(REPLACE(['555-123-456':string], '0', '*'), '9', '0'), '*', '9')), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt index eec9ddd9e..6aac164ff 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & UNMASK::(LOWER([c_lname])) != 'lopez':string & ENDSWITH(UNMASK::(LOWER([c_fname])), 'a':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 's':string) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ENDSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '5':string), columns={}) + FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & c_lname != MASK::(UPPER(['lopez':string])) & ENDSWITH(UNMASK::(LOWER([c_fname])), 'a':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 's':string) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ENDSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '5':string), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_birthday': c_birthday, 'c_fname': c_fname, 'c_lname': c_lname, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt index 3a58fa548..9526425f5 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string | UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'savings':string, columns={'a_custkey': a_custkey}) + FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & a_type == MASK::(SUBSTRING(['retirement':string], 2) || SUBSTRING(['retirement':string], 1, 1)) | a_type == MASK::(SUBSTRING(['savings':string], 2) || SUBSTRING(['savings':string], 1, 1)), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string), columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt b/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt index 07ceec906..f3451d2a7 100644 --- a/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt +++ b/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([t0.customerid])) == UNMASK::(PTY_UNPROTECT([t1.customerid], 'deAccount')), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([currency])) != 'GBP':string & balance < 20000:numeric, columns={'customerid': customerid}) + FILTER(condition=currency != MASK::(PTY_PROTECT(['GBP':string], 'deAccount')) & balance < 20000:numeric, columns={'customerid': customerid}) SCAN(table=bodo.fsi.accounts, columns={'balance': balance, 'currency': currency, 'customerid': customerid}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([state], 'deAddress')) == 'California':string, columns={'customerid': customerid}) + FILTER(condition=state == MASK::(PTY_PROTECT(['California':string], 'deAddress')), columns={'customerid': customerid}) SCAN(table=bodo.fsi.protected_customers, columns={'customerid': customerid, 'state': state}) diff --git a/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt b/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt index 12d0083c4..db5c10d1f 100644 --- a/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt +++ b/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([t0.customerid])) == UNMASK::(PTY_UNPROTECT([t1.customerid], 'deAccount')), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT_DOB([createddate]))) <= 2022:numeric & ISIN(UNMASK::(PTY_UNPROTECT_ACCOUNT([currency])), ['USD', 'GPB', 'EUR', 'JPY', 'AUD']:array[unknown]), columns={'customerid': customerid}) + FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT_DOB([createddate]))) <= 2022:numeric & ISIN(currency, [Call(op=MASK, inputs=[Literal(value='USD', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='GPB', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='EUR', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='JPY', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='AUD', type=UnknownType())], return_type=StringType())]:bool), columns={'customerid': customerid}) SCAN(table=bodo.fsi.accounts, columns={'createddate': createddate, 'currency': currency, 'customerid': customerid}) - FILTER(condition=ISIN(UNMASK::(PTY_UNPROTECT([state], 'deAddress')), ['Georgia', 'Alabama', 'Mississippi', 'Arkansas', 'Louisiana', 'Florida', 'South Carolina', 'North Carolina', 'Texas', 'Tennessee', 'Missouri']:array[unknown]) & NOT(ISIN(UNMASK::(PTY_UNPROTECT([firstname], 'deName')), ['Jennifer', 'Julio', 'Johnson', 'Jameson', 'Michael', 'Robert']:array[unknown])), columns={'customerid': customerid}) + FILTER(condition=ISIN(state, [Call(op=MASK, inputs=[Literal(value='Georgia', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Alabama', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Mississippi', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Arkansas', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Louisiana', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Florida', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='South Carolina', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='North Carolina', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Texas', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Tennessee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Missouri', type=UnknownType())], return_type=StringType())]:bool) & NOT(ISIN(firstname, [Call(op=MASK, inputs=[Literal(value='Jennifer', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Julio', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Johnson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Jameson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Michael', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Robert', type=UnknownType())], return_type=StringType())]:bool)), columns={'customerid': customerid}) SCAN(table=bodo.fsi.protected_customers, columns={'customerid': customerid, 'firstname': firstname, 'state': state}) diff --git a/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt b/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt index 678f7cc8e..0e96429f6 100644 --- a/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('num_customers_checking_accounts', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([accounttype], 'deAccount')) != 'checking':string, columns={}) + FILTER(condition=accounttype != MASK::(PTY_PROTECT(['checking':string], 'deAccount')), columns={}) SCAN(table=bodo.fsi.accounts, columns={'accounttype': accounttype}) diff --git a/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt b/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt index 6629a23f5..2021c990a 100644 --- a/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(UNMASK::(PTY_UNPROTECT([lastname], 'deName')), ['Barnes', 'Hernandez', 'Moore']:array[unknown]), columns={}) + FILTER(condition=ISIN(lastname, [Call(op=MASK, inputs=[Literal(value='Barnes', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Hernandez', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Moore', type=UnknownType())], return_type=StringType())]:bool), columns={}) SCAN(table=bodo.fsi.protected_customers, columns={'lastname': lastname}) diff --git a/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt b/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt index fe4f139f1..fdd20a45a 100644 --- a/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(UNMASK::(PTY_UNPROTECT([lastname], 'deName')), ['Barnes', 'Hernandez', 'Moore']:array[unknown])), columns={}) + FILTER(condition=NOT(ISIN(lastname, [Call(op=MASK, inputs=[Literal(value='Barnes', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Hernandez', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Moore', type=UnknownType())], return_type=StringType())]:bool)), columns={}) SCAN(table=bodo.fsi.protected_customers, columns={'lastname': lastname}) diff --git a/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt index f30c88a17..1422ebb17 100644 --- a/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) >= datetime.date(2002, 1, 1):datetime & ISIN(UNMASK::(PTY_UNPROTECT_NAME([last_name])), ['Johnson', 'Robinson']:array[unknown]), columns={}) + FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) >= datetime.date(2002, 1, 1):datetime & ISIN(last_name, [Call(op=MASK, inputs=[Literal(value='Johnson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Robinson', type=UnknownType())], return_type=StringType())]:bool), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt index 81ca813f2..530b3f93d 100644 --- a/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT_NAME([last_name])) != 'Smith':string & UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) == datetime.date(1979, 3, 7):datetime, columns={}) + FILTER(condition=last_name != MASK::(PTY_PROTECT(['Smith':string], 'deName')) & date_of_birth == MASK::(PTY_PROTECT([datetime.date(1979, 3, 7):datetime], 'deDOB')), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt index 36bd47d88..ff57eb91f 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([payment_method])) == 'Cash':string, columns={}) + FILTER(condition=payment_method == MASK::(PTY_PROTECT_ACCOUNT(['Cash':string])), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt index 39d2f23ca..89904da39 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([payment_method])) != 'Credit Card':string, columns={}) + FILTER(condition=payment_method != MASK::(PTY_PROTECT_ACCOUNT(['Credit Card':string])), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt index 17f7808d7..36ff8674c 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(UNMASK::(PTY_UNPROTECT_ACCOUNT([payment_method])), ['Cash', 'Gift Card']:array[unknown]), columns={}) + FILTER(condition=ISIN(payment_method, [Call(op=MASK, inputs=[Literal(value='Cash', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Gift Card', type=UnknownType())], return_type=StringType())]:bool), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt index f4ebc77d6..4853a4398 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(UNMASK::(PTY_UNPROTECT_ACCOUNT([payment_method])), ['Mobile Payment', 'Gift Card']:array[unknown])), columns={}) + FILTER(condition=NOT(ISIN(payment_method, [Call(op=MASK, inputs=[Literal(value='Mobile Payment', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Gift Card', type=UnknownType())], return_type=StringType())]:bool)), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql index b369e11e8..bffd9c7c0 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - LOWER(c_lname) = 'lee' + c_lname = UPPER('lee') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql index ea6fd0c87..f1f7b1c78 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - LOWER(c_lname) <> 'lee' + c_lname <> UPPER('lee') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql index 72fc896f9..aa7550e49 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - LOWER(c_lname) IN ('lee', 'smith', 'rodriguez') + c_lname IN (UPPER('lee'), UPPER('smith'), UPPER('rodriguez')) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql index 55f030a02..6b329065c 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - NOT LOWER(c_lname) IN ('lee', 'smith', 'rodriguez') + NOT c_lname IN (UPPER('lee'), UPPER('smith'), UPPER('rodriguez')) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql index 392da3ba3..fc4234022 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') = DATE('1985-04-12') + c_birthday = DATE('1985-04-12', '-472 days') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_11_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_11_rewrite_sqlite.sql index 6db0cea04..48071e73a 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_11_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_11_rewrite_sqlite.sql @@ -12,7 +12,7 @@ JOIN crbnk.accounts AS accounts ) AS INTEGER) END JOIN crbnk.customers AS customers - ON LOWER(customers.c_fname) = 'alice' - AND accounts.a_custkey = ( + ON accounts.a_custkey = ( 42 - customers.c_key ) + AND customers.c_fname = UPPER('alice') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_15_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_15_rewrite_sqlite.sql index 3e6a6153f..8162068dd 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_15_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_15_rewrite_sqlite.sql @@ -3,9 +3,9 @@ WITH _u_0 AS ( a_custkey AS _u_1 FROM crbnk.accounts WHERE - ( - SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) - ) = 'retirement' + a_type = ( + SUBSTRING('retirement', 2) || SUBSTRING('retirement', 1, 1) + ) GROUP BY 1 ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql index a6924f0b2..41580c0f1 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql @@ -3,12 +3,12 @@ WITH _u_0 AS ( a_custkey AS _u_1 FROM crbnk.accounts WHERE - ( - SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) - ) <> 'checking' - AND ( - SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) - ) <> 'savings' + a_type <> ( + SUBSTRING('checking', 2) || SUBSTRING('checking', 1, 1) + ) + AND a_type <> ( + SUBSTRING('savings', 2) || SUBSTRING('savings', 1, 1) + ) GROUP BY 1 ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_24_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_24_rewrite_sqlite.sql index 94ad1441f..16e183baa 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_24_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_24_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') = DATE('1991-11-15') + c_birthday = DATE('1991-11-15', '-472 days') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql index 9f698eb20..2ef5a72af 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql @@ -2,5 +2,5 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') <> DATE('1991-11-15') - OR DATE(c_birthday, '+472 days') IS NULL + DATE(c_birthday, '+472 days') IS NULL + OR c_birthday <> DATE('1991-11-15', '-472 days') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql index 910277464..1b6629cc3 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') = '555-123-456' + c_phone = REPLACE(REPLACE(REPLACE('555-123-456', '0', '*'), '9', '0'), '*', '9') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql index 61d66382d..8e7e601b6 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql @@ -8,25 +8,21 @@ WHERE OR LOWER(c_fname) LIKE '%e' OR LOWER(c_fname) LIKE '%s' ) - AND ( - DATE(c_birthday, '+472 days') IS NULL OR LOWER(c_lname) <> 'lopez' - ) AND ( DATE(c_birthday, '+472 days') IS NULL OR NOT ( SUBSTRING(c_addr, -1) || SUBSTRING(c_addr, 1, LENGTH(c_addr) - 1) ) IS NULL ) + AND ( + DATE(c_birthday, '+472 days') IS NULL OR c_lname <> UPPER('lopez') + ) AND ( LOWER(c_fname) LIKE '%a' OR LOWER(c_fname) LIKE '%e' OR LOWER(c_fname) LIKE '%s' OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' ) - AND ( - LOWER(c_lname) <> 'lopez' - OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' - ) AND ( NOT ( SUBSTRING(c_addr, -1) || SUBSTRING(c_addr, 1, LENGTH(c_addr) - 1) @@ -37,3 +33,7 @@ WHERE NOT DATE(c_birthday, '+472 days') IS NULL OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' ) + AND ( + REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' + OR c_lname <> UPPER('lopez') + ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql index 2b59d7a7d..0be30d552 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql @@ -14,13 +14,13 @@ JOIN crbnk.customers AS customers 42 - customers.c_key ) WHERE - ( - ( - SUBSTRING(accounts.a_type, -1) || SUBSTRING(accounts.a_type, 1, LENGTH(accounts.a_type) - 1) - ) = 'retirement' - OR ( - SUBSTRING(accounts.a_type, -1) || SUBSTRING(accounts.a_type, 1, LENGTH(accounts.a_type) - 1) - ) = 'savings' - ) - AND CAST(STRFTIME('%Y', DATETIME(accounts.a_open_ts, '+123456789 seconds')) AS INTEGER) < 2020 + CAST(STRFTIME('%Y', DATETIME(accounts.a_open_ts, '+123456789 seconds')) AS INTEGER) < 2020 AND SQRT(accounts.a_balance) >= 5000 + AND ( + accounts.a_type = ( + SUBSTRING('retirement', 2) || SUBSTRING('retirement', 1, 1) + ) + OR accounts.a_type = ( + SUBSTRING('savings', 2) || SUBSTRING('savings', 1, 1) + ) + ) diff --git a/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql index b59fe5aa3..fffd63196 100644 --- a/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql @@ -3,6 +3,6 @@ SELECT FROM bodo.fsi.accounts AS accounts JOIN bodo.fsi.protected_customers AS protected_customers ON PTY_UNPROTECT(protected_customers.customerid, 'deAccount') = PTY_UNPROTECT_ACCOUNT(accounts.customerid) - AND PTY_UNPROTECT(protected_customers.state, 'deAddress') = 'California' + AND protected_customers.state = PTY_PROTECT('California', 'deAddress') WHERE - PTY_UNPROTECT_ACCOUNT(accounts.currency) <> 'GBP' AND accounts.balance < 20000 + accounts.balance < 20000 AND accounts.currency <> PTY_PROTECT('GBP', 'deAccount') diff --git a/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql index a64f3585a..0e2d85f72 100644 --- a/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql @@ -2,9 +2,9 @@ SELECT COUNT(*) AS n FROM bodo.fsi.accounts AS accounts JOIN bodo.fsi.protected_customers AS protected_customers - ON NOT PTY_UNPROTECT(protected_customers.firstname, 'deName') IN ('Jennifer', 'Julio', 'Johnson', 'Jameson', 'Michael', 'Robert') + ON NOT protected_customers.firstname IN (PTY_PROTECT('Jennifer', 'deName'), PTY_PROTECT('Julio', 'deName'), PTY_PROTECT('Johnson', 'deName'), PTY_PROTECT('Jameson', 'deName'), PTY_PROTECT('Michael', 'deName'), PTY_PROTECT('Robert', 'deName')) AND PTY_UNPROTECT(protected_customers.customerid, 'deAccount') = PTY_UNPROTECT_ACCOUNT(accounts.customerid) - AND PTY_UNPROTECT(protected_customers.state, 'deAddress') IN ('Georgia', 'Alabama', 'Mississippi', 'Arkansas', 'Louisiana', 'Florida', 'South Carolina', 'North Carolina', 'Texas', 'Tennessee', 'Missouri') + AND protected_customers.state IN (PTY_PROTECT('Georgia', 'deAddress'), PTY_PROTECT('Alabama', 'deAddress'), PTY_PROTECT('Mississippi', 'deAddress'), PTY_PROTECT('Arkansas', 'deAddress'), PTY_PROTECT('Louisiana', 'deAddress'), PTY_PROTECT('Florida', 'deAddress'), PTY_PROTECT('South Carolina', 'deAddress'), PTY_PROTECT('North Carolina', 'deAddress'), PTY_PROTECT('Texas', 'deAddress'), PTY_PROTECT('Tennessee', 'deAddress'), PTY_PROTECT('Missouri', 'deAddress')) WHERE - PTY_UNPROTECT_ACCOUNT(accounts.currency) IN ('USD', 'GPB', 'EUR', 'JPY', 'AUD') - AND YEAR(CAST(PTY_UNPROTECT_DOB(accounts.createddate) AS TIMESTAMP)) <= 2022 + YEAR(CAST(PTY_UNPROTECT_DOB(accounts.createddate) AS TIMESTAMP)) <= 2022 + AND accounts.currency IN (PTY_PROTECT('USD', 'deAccount'), PTY_PROTECT('GPB', 'deAccount'), PTY_PROTECT('EUR', 'deAccount'), PTY_PROTECT('JPY', 'deAccount'), PTY_PROTECT('AUD', 'deAccount')) diff --git a/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql index 18d3b432e..eb9530c20 100644 --- a/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS num_customers_checking_accounts FROM bodo.fsi.accounts WHERE - PTY_UNPROTECT(accounttype, 'deAccount') <> 'checking' + accounttype <> PTY_PROTECT('checking', 'deAccount') diff --git a/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql index 8da745231..a3438c207 100644 --- a/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.fsi.protected_customers WHERE - PTY_UNPROTECT(lastname, 'deName') IN ('Barnes', 'Hernandez', 'Moore') + lastname IN (PTY_PROTECT_NAME('Barnes'), PTY_PROTECT_NAME('Hernandez'), PTY_PROTECT_NAME('Moore')) diff --git a/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql index 3eadc83bc..3c0e5c76e 100644 --- a/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.fsi.protected_customers WHERE - NOT PTY_UNPROTECT(lastname, 'deName') IN ('Barnes', 'Hernandez', 'Moore') + NOT lastname IN (PTY_PROTECT_NAME('Barnes'), PTY_PROTECT_NAME('Hernandez'), PTY_PROTECT_NAME('Moore')) diff --git a/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql index d3db075bf..3098195a7 100644 --- a/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql @@ -3,4 +3,4 @@ SELECT FROM bodo.retail.protected_loyalty_members WHERE PTY_UNPROTECT(date_of_birth, 'deDOB') >= CAST('2002-01-01' AS DATE) - AND PTY_UNPROTECT_NAME(last_name) IN ('Johnson', 'Robinson') + AND last_name IN (PTY_PROTECT('Johnson', 'deName'), PTY_PROTECT('Robinson', 'deName')) diff --git a/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql index 87a7380a7..6733bb292 100644 --- a/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql @@ -2,5 +2,5 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - PTY_UNPROTECT(date_of_birth, 'deDOB') = CAST('1979-03-07' AS DATE) - AND PTY_UNPROTECT_NAME(last_name) <> 'Smith' + date_of_birth = PTY_PROTECT(CAST('1979-03-07' AS DATE), 'deDOB') + AND last_name <> PTY_PROTECT('Smith', 'deName') diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql index 826832c55..caea10607 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - PTY_UNPROTECT_ACCOUNT(payment_method) = 'Cash' + payment_method = PTY_PROTECT_ACCOUNT('Cash') diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql index 64ac0c6b7..7277069b6 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - PTY_UNPROTECT_ACCOUNT(payment_method) <> 'Credit Card' + payment_method <> PTY_PROTECT_ACCOUNT('Credit Card') diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql index 2cf6554da..f65b42330 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - PTY_UNPROTECT_ACCOUNT(payment_method) IN ('Cash', 'Gift Card') + payment_method IN (PTY_PROTECT_ACCOUNT('Cash'), PTY_PROTECT_ACCOUNT('Gift Card')) diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql index 6d863d06d..73128145f 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - NOT PTY_UNPROTECT_ACCOUNT(payment_method) IN ('Mobile Payment', 'Gift Card') + NOT payment_method IN (PTY_PROTECT_ACCOUNT('Mobile Payment'), PTY_PROTECT_ACCOUNT('Gift Card'))