Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
83aaa52
Initial setup started
knassre-bodo Aug 21, 2025
3930b3d
Added all metadata except the protect/unprotect protocols for CRYPTBANK
knassre-bodo Aug 21, 2025
4a02e2a
Added basic tests before inclusion of encryption
knassre-bodo Aug 22, 2025
32e678e
Added more test files
knassre-bodo Aug 22, 2025
957f010
Renamed tests
knassre-bodo Aug 22, 2025
2596a00
Added new tests [RUN CI]
knassre-bodo Aug 22, 2025
f2e46c4
Re-enabling encryption of CRYPTBANK data and skipping e2e tests until…
knassre-bodo Aug 22, 2025
86036b7
Adidng more tests [RUN CI]
knassre-bodo Aug 25, 2025
f896f14
[RUN CI]
knassre-bodo Aug 25, 2025
c1315e3
Added initial relational setup with operator for unmasking
knassre-bodo Aug 26, 2025
48b892b
Fixing naming bug
knassre-bodo Aug 26, 2025
040d725
Added cryptbank SQL support with encryptions injected
knassre-bodo Aug 26, 2025
16de5a3
[RUN CI]
knassre-bodo Aug 26, 2025
5241003
Fixing JSON file [RUN CI]
knassre-bodo Aug 26, 2025
2a27c99
Merge branch 'kian/sqlite_masked_tests' into kian/masked_relational_r…
knassre-bodo Aug 26, 2025
c88d2f0
Resolving conflicts
knassre-bodo Aug 27, 2025
442621e
Merge branch 'kian/sqlite_masked_tests' into kian/masked_relational_r…
knassre-bodo Aug 27, 2025
e5b8ab8
Resolving conflicts [RUN CI]
knassre-bodo Sep 8, 2025
ffbe3fe
Merge branch 'main' into kian/masked_relational_rewrite
knassre-bodo Sep 8, 2025
d352175
add rest
hadia206 Sep 8, 2025
aa2ee68
sf_masked_examples.json
hadia206 Sep 8, 2025
f09d0e7
Revisions [RUN CI]
knassre-bodo Sep 9, 2025
76a16c2
Merge branch 'main' into kian/masked_relational_rewrite
knassre-bodo Sep 10, 2025
be00e58
Merge branch 'main' into kian/masked_relational_rewrite
knassre-bodo Sep 15, 2025
fa2d869
[RUN CI]
knassre-bodo Sep 15, 2025
2189fb3
Merge branch 'main' into kian/masked_relational_rewrite
knassre-bodo Sep 17, 2025
0f6e59f
Adding environment variable
knassre-bodo Sep 17, 2025
98a9c4c
[RUN CI]
knassre-bodo Sep 17, 2025
bf2b075
add sql and relational files and tests
hadia206 Sep 19, 2025
a883759
use other version in some metadata and skip tests
hadia206 Sep 19, 2025
5d273c3
add import deleted by ruff
hadia206 Sep 19, 2025
2d69928
merge
hadia206 Sep 22, 2025
bc09e3f
Github action
hadia206 Sep 22, 2025
ab08ce4
Merge branch 'main' into kian/masked_relational_rewrite
knassre-bodo Sep 24, 2025
a36fb2b
[run CI] address comments (remove test and add type hints)
hadia206 Sep 24, 2025
df477e7
Revisions
knassre-bodo Sep 24, 2025
cccbe19
[RUN CI]
knassre-bodo Sep 24, 2025
600492a
Merge remote-tracking branch 'origin/Hadia/sf_masked_tests' into kian…
knassre-bodo Sep 25, 2025
82e9691
Resolving conflicts, adding raw vs rewrite
knassre-bodo Sep 25, 2025
2a24514
Adding raw vs rewrite
knassre-bodo Sep 25, 2025
aea501f
Fixing SQL handling and fixtures
knassre-bodo Sep 25, 2025
a733022
Resolving conflicts
knassre-bodo Sep 25, 2025
a5c3b9c
WIP
knassre-bodo Sep 25, 2025
bdde458
Adding more tests
knassre-bodo Sep 29, 2025
4a58775
Adding more tests
knassre-bodo Sep 29, 2025
66f2193
Resolving test updates
knassre-bodo Sep 29, 2025
630c7cc
Adding more tests
knassre-bodo Sep 29, 2025
f4c318f
Resolving conflicts [RUN ALL]
knassre-bodo Sep 29, 2025
478b2f6
[RUN CI] [RUN SF_MASKED]
knassre-bodo Sep 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion pydough/conversion/relational_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pydough.metadata import (
CartesianProductMetadata,
GeneralJoinMetadata,
MaskedTableColumnMetadata,
SimpleJoinMetadata,
SimpleTableMetadata,
)
Expand Down Expand Up @@ -803,7 +804,33 @@ def build_simple_table_scan(
assert isinstance(expr, ColumnReference)
real_names.add(expr.name)
uniqueness.add(frozenset(real_names))
answer = Scan(node.collection.collection.table_path, scan_columns, uniqueness)
answer: RelationalNode = Scan(
node.collection.collection.table_path, scan_columns, uniqueness
)

# If any of the columns are masked, insert a projection on top to unmask
# them.
if any(
isinstance(expr, HybridColumnExpr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would create a helper function for code readability:

def _is_a_masked_column(expr):
    return isinstance(expr, HybridColumnExpr) 
        and isinstance(expr.column.column_property, MaskedTableColumnMetadata)

Then:

if any (
    _is_a_masked_column(expr) 
    for expr in node.terms.values()
):

and isinstance(expr.column.column_property, MaskedTableColumnMetadata)
for expr in node.terms.values()
):
unmask_columns: dict[str, RelationalExpression] = {}
for name, hybrid_expr in node.terms.items():
if isinstance(hybrid_expr, HybridColumnExpr) and isinstance(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use the helper function here: if _is_a_masked_column(hybrid_expr):

hybrid_expr.column.column_property, MaskedTableColumnMetadata
):
unmask_columns[name] = CallExpression(
pydop.MaskedExpressionFunctionOperator(
hybrid_expr.column.column_property, True
),
hybrid_expr.column.column_property.unprotected_data_type,
[ColumnReference(name, hybrid_expr.typ)],
)
else:
unmask_columns[name] = ColumnReference(name, hybrid_expr.typ)
answer = Project(answer, unmask_columns)

return TranslationOutput(answer, out_columns)

def translate_sub_collection(
Expand Down
2 changes: 2 additions & 0 deletions pydough/pydough_operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"MONOTONIC",
"MONTH",
"MUL",
"MaskedExpressionFunctionOperator",
"NDISTINCT",
"NEQ",
"NEXT",
Expand Down Expand Up @@ -207,6 +208,7 @@
ExpressionFunctionOperator,
ExpressionWindowOperator,
KeywordBranchingExpressionFunctionOperator,
MaskedExpressionFunctionOperator,
PyDoughExpressionOperator,
SqlAliasExpressionFunctionOperator,
SqlMacroExpressionFunctionOperator,
Expand Down
2 changes: 2 additions & 0 deletions pydough/pydough_operators/expression_operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"MONOTONIC",
"MONTH",
"MUL",
"MaskedExpressionFunctionOperator",
"NDISTINCT",
"NEQ",
"NEXT",
Expand Down Expand Up @@ -107,6 +108,7 @@
from .expression_operator import PyDoughExpressionOperator
from .expression_window_operators import ExpressionWindowOperator
from .keyword_branching_operators import KeywordBranchingExpressionFunctionOperator
from .masked_expression_function_operator import MaskedExpressionFunctionOperator
from .registered_expression_operators import (
ABS,
ABSENT,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Special operators containing logic to mask or unmask data based on a masked
table column's metadata.
"""

__all__ = ["MaskedExpressionFunctionOperator"]


from pydough.metadata.properties import MaskedTableColumnMetadata
from pydough.pydough_operators.type_inference import (
ConstantType,
ExpressionTypeDeducer,
RequireNumArgs,
TypeVerifier,
)
from pydough.types import PyDoughType

from .expression_function_operators import ExpressionFunctionOperator


class MaskedExpressionFunctionOperator(ExpressionFunctionOperator):
"""
A special expression function operator that masks or unmasks data based on
a masked table column's metadata. The operator contains the metadata for
the column, but can represent either a masking or unmasking operation
depending on the `is_unprotect` flag.
"""

def __init__(
self,
masking_metadata: MaskedTableColumnMetadata,
is_unprotect: bool,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We store the object with the metadata & this boolean so all we need to do for #418 to create a MASK call is create a copy of the operator with is_unprotect toggled to False.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why named is_unprotect, for consistency is_unmasked makes more sense since class is called Masked...?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but will rename to is_unmask since it is describing whether the function masks or unmasks,.

):
# Create a dummy verifier that requires exactly one argument, since all
# masking/unmasking operations are unary.
verifier: TypeVerifier = RequireNumArgs(1)
Comment on lines +34 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that a guarantee for all different use cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes since, from a PyDough perspective, the operator is always invoked the form UNMASK(arg) or MASK(arg). Internally, when the macro gets expanded, it may contain multiple arguments, but it is only parameterized on one argument (the thing getting masked/unmasked).


# Create a dummy deducer that always returns the appropriate data type
# from the metadata based on whether this is a masking or unmasking
# operation.
target_type: PyDoughType = (
masking_metadata.unprotected_data_type
if is_unprotect
else masking_metadata.data_type
)
deducer: ExpressionTypeDeducer = ConstantType(target_type)

super().__init__(
"UNMASK" if is_unprotect else "MASK", False, verifier, deducer, False
)
self._masking_metadata: MaskedTableColumnMetadata = masking_metadata
self._is_unprotect: bool = is_unprotect

@property
def masking_metadata(self) -> MaskedTableColumnMetadata:
"""
The metadata for the masked column.
"""
return self._masking_metadata

@property
def is_unprotect(self) -> bool:
"""
Whether this operator is unprotecting (True) or protecting (False).
"""
return self._is_unprotect

@property
def format_string(self) -> str:
"""
The format string to use for this operator to either mask or unmask the
operand.
"""
return (
self.masking_metadata.unprotect_protocol
if self.is_unprotect
else self.masking_metadata.protect_protocol
Comment on lines 75 to 77
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same. For consistency, we should rename those to be unmask_protocol and mask_protocol

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are in the metadata, and are named to be consistent with the JSON fields protect protocol and unprotect protocol. If we want to change those, that's now an API spec change to something already merged. We can, but that's a separate discussion from this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with both of you. That is a separate discussion from this PR but I think we should consider changing protect/unprotect references to mask/unmask.

)
Comment on lines 70 to 78
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This switch logic makes the SQL conversion step seemless


def to_string(self, arg_strings: list[str]) -> str:
name: str = "UNMASK" if self.is_unprotect else "MASK"
arg_strings = [f"[{s}]" for s in arg_strings]
return f"{name}::({self.format_string.format(*arg_strings)})"
Comment on lines 80 to 83
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is mostly for relational plans, so we can clearly delineate the mask/unmask call, the logic within, and where the arguments get injected. For example, if I am unmasking the expression foo - 7, and the logic to unmask x is (2 * x) - 1, then this gets stringified as UNMASK::((2 * {foo - 7}) - 1)


def equals(self, other: object) -> bool:
return (
isinstance(other, MaskedExpressionFunctionOperator)
and self.masking_metadata == other.masking_metadata
and self.is_unprotect == other.is_unprotect
and super().equals(other)
)
18 changes: 15 additions & 3 deletions pydough/sqlglot/transform_bindings/base_transform_bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,24 @@ def convert_call_to_sqlglot(
return sqlglot_expressions.Anonymous(
this=operator.sql_function_alias, expressions=args
)
if isinstance(operator, pydop.SqlMacroExpressionFunctionOperator):
if isinstance(
operator,
(
pydop.MaskedExpressionFunctionOperator,
pydop.SqlMacroExpressionFunctionOperator,
),
):
Comment on lines +175 to +181
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just extending the logic we already use for UDFs with macro text, but now with the new MaskedExpressionFunctionOperator operator, which contains a format string. This format string is either the unprotect or protect format string, depending on whether is_unprotect is True/False in the operator.

# For user defined operators that are a macro for SQL text, convert
# the arguments to SQL text strings then inject them into the macro
# as a format string, then re-parse it.
# as a format string, then re-parse it. The same idea works for the
# masking/unmasking operators
arg_strings: list[str] = [arg.sql() for arg in args]
combined_string: str = operator.macro_text.format(*arg_strings)
fmt_string: str
if isinstance(operator, pydop.MaskedExpressionFunctionOperator):
fmt_string = operator.format_string
else:
fmt_string = operator.macro_text
combined_string: str = fmt_string.format(*arg_strings)
return parse_one(combined_string)
match operator:
case pydop.NOT:
Expand Down
18 changes: 18 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,24 @@ def sqlite_technograph_connection() -> DatabaseContext:
return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE)


@pytest.fixture(
params=[
pytest.param(("0", "raw"), id="raw"),
pytest.param(("1", "rewrite"), id="rewrite"),
]
)
def enable_mask_rewrites(request):
"""
Temporarily enable the mask rewrites by setting the environment variable to
the specified value, returning the name that should be used to identify
the situation ("raw" for disabled, "rewrite" for enabled).
"""
old_value: str = os.environ.get("PYDOUGH_ENABLE_MASK_REWRITES", "0")
os.environ["PYDOUGH_ENABLE_MASK_REWRITES"] = request.param[0]
yield request.param[1]
os.environ["PYDOUGH_ENABLE_MASK_REWRITES"] = old_value
Comment on lines 590 to 593
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FOR NOW this environment variable has no effect, but it will in #418. We use it to clearly separate the difference in executing with vs without it, and so we can verify everything still runs correctly in both versions.

This means that, for the time being, the _raw vs _rewrite files are the same.



@pytest.fixture(scope="session")
def sqlite_cryptbank_connection() -> DatabaseContext:
"""
Expand Down
41 changes: 20 additions & 21 deletions tests/gen_data/init_cryptbank.sql
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ CREATE TABLE TRANSACTIONS (
);

INSERT INTO CUSTOMERS (c_key, c_fname, c_lname, c_phone, c_email, c_addr, c_birthday)
SELECT *
-- 42 - column1, -- ARITHMETIC SHIFT: 42
-- UPPER(column2), -- UPPERCASE
-- UPPER(column3), -- UPPERCASE
-- REPLACE(REPLACE(REPLACE(column4, '0', '*'), '9', '0'), '*', '9'), -- DIGIT SWITCH: 0 <-> 9
-- SUBSTRING(column5, 2) || SUBSTRING(column5, 1, 1), -- FIRST CHAR TRANSPOSE
-- SUBSTRING(column6, 2) || SUBSTRING(column6, 1, 1), -- FIRST CHAR TRANSPOSE
-- DATE(column7, '-472 days') -- DAY SHIFT: 472
SELECT
42 - column1, -- ARITHMETIC SHIFT: 42
UPPER(column2), -- UPPERCASE
UPPER(column3), -- UPPERCASE
REPLACE(REPLACE(REPLACE(column4, '0', '*'), '9', '0'), '*', '9'), -- DIGIT SWITCH: 0 <-> 9
SUBSTRING(column5, 2) || SUBSTRING(column5, 1, 1), -- FIRST CHAR TRANSPOSE
SUBSTRING(column6, 2) || SUBSTRING(column6, 1, 1), -- FIRST CHAR TRANSPOSE
DATE(column7, '-472 days') -- DAY SHIFT: 472
Comment on lines -40 to +47
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to restore encryption in the original PR, but its fine bc the E2E tests were being skipped until now

FROM (
VALUES
(1, 'alice', 'johnson', '555-123-4567', '[email protected]', '123 Maple St;Portland;OR;97205', '1985-04-12'),
Expand Down Expand Up @@ -81,13 +81,13 @@ INSERT INTO BRANCHES (b_key, b_name, b_addr) VALUES
;

INSERT INTO ACCOUNTS (a_key, a_custkey, a_branchkey, a_balance, a_type, a_open_ts)
SELECT *
-- CAST(CAST(column1 as TEXT) || CAST(column1 as TEXT) AS INTEGER),
-- column2,
-- column3,
-- column4 * column4, -- GEOMETRIC SHIFT
-- SUBSTRING(column5, 2) || SUBSTRING(column5, 1, 1), -- FIRST CHAR TRANSPOSE
-- DATETIME(column6, '-123456789 seconds') -- SECOND SHIFT: 123456789
SELECT
CAST(CAST(column1 as TEXT) || CAST(column1 as TEXT) AS INTEGER),
column2,
column3,
column4 * column4, -- GEOMETRIC SHIFT
SUBSTRING(column5, 2) || SUBSTRING(column5, 1, 1), -- FIRST CHAR TRANSPOSE
DATETIME(column6, '-123456789 seconds') -- SECOND SHIFT: 123456789
FROM (
VALUES
-- Customer 1 (alice johnson, OR) - 3 accounts
Expand Down Expand Up @@ -189,12 +189,11 @@ VALUES

INSERT INTO TRANSACTIONS (t_key, t_sourceaccount, t_destaccount, t_amount, t_ts)
SELECT
*
-- column1,
-- column2,
-- column3,
-- 1025.67 - column4, -- ARITHMETIC SHIFT: 1025.67
-- DATETIME(column5, '-54321 seconds') -- SECOND SHIFT: 54321
column1,
column2,
column3,
1025.67 - column4, -- ARITHMETIC SHIFT: 1025.67
DATETIME(column5, '-54321 seconds') -- SECOND SHIFT: 54321
FROM (
VALUES
(1, 41, 8, 2753.92, '2019-11-11 18:00:52'),
Expand Down
13 changes: 8 additions & 5 deletions tests/test_masked_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,12 +624,15 @@ def test_pipeline_until_relational_cryptbank(
masked_graphs: graph_fetcher,
get_plan_test_filename: Callable[[str], str],
update_tests: bool,
enable_mask_rewrites: str,
) -> None:
"""
Tests the conversion of the PyDough queries on the custom cryptbank dataset
into relational plans.
"""
file_path: str = get_plan_test_filename(cryptbank_pipeline_test_data.test_name)
file_path: str = get_plan_test_filename(
f"{cryptbank_pipeline_test_data.test_name}_{enable_mask_rewrites}"
)
cryptbank_pipeline_test_data.run_relational_test(
masked_graphs, file_path, update_tests
)
Expand All @@ -641,13 +644,15 @@ def test_pipeline_until_sql_cryptbank(
sqlite_tpch_db_context: DatabaseContext,
get_sql_test_filename: Callable[[str, DatabaseDialect], str],
update_tests: bool,
enable_mask_rewrites: str,
):
"""
Tests the conversion of the PyDough queries on the custom cryptbank dataset
into SQL text.
"""
file_path: str = get_sql_test_filename(
cryptbank_pipeline_test_data.test_name, sqlite_tpch_db_context.dialect
f"{cryptbank_pipeline_test_data.test_name}_{enable_mask_rewrites}",
sqlite_tpch_db_context.dialect,
)
cryptbank_pipeline_test_data.run_sql_test(
masked_graphs,
Expand All @@ -657,14 +662,12 @@ def test_pipeline_until_sql_cryptbank(
)


# @pytest.mark.skip(
# reason="Skipping until masked table column relational handling is implemented"
# )
Comment on lines -660 to -662
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to restore the skip marker previously, but this was fine because the data wasn't being encrypted before. Now we are making it so it is encrypted (see init_cryptbank.sql), but the test still passes because of the changes.

@pytest.mark.execute
def test_pipeline_e2e_cryptbank(
cryptbank_pipeline_test_data: PyDoughPandasTest,
masked_graphs: graph_fetcher,
sqlite_cryptbank_connection: DatabaseContext,
enable_mask_rewrites: str,
):
"""
Test executing the the custom queries with the custom cryptbank dataset
Expand Down
4 changes: 0 additions & 4 deletions tests/test_plan_refsols/cryptbank_agg_01.txt

This file was deleted.

4 changes: 4 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_01_raw.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ROOT(columns=[('n', ROUND(avg_unmask_t_amount, 2:numeric))], orderings=[])
AGGREGATE(keys={}, aggregations={'avg_unmask_t_amount': AVG(UNMASK::((1025.67 - ([t_amount]))))})
FILTER(condition=MONTH(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 6:numeric & YEAR(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 2022:numeric, columns={'t_amount': t_amount})
SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_ts': t_ts})
4 changes: 4 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_01_rewrite.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ROOT(columns=[('n', ROUND(avg_unmask_t_amount, 2:numeric))], orderings=[])
AGGREGATE(keys={}, aggregations={'avg_unmask_t_amount': AVG(UNMASK::((1025.67 - ([t_amount]))))})
FILTER(condition=MONTH(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 6:numeric & YEAR(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 2022:numeric, columns={'t_amount': t_amount})
SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_ts': t_ts})
3 changes: 0 additions & 3 deletions tests/test_plan_refsols/cryptbank_agg_02.txt

This file was deleted.

3 changes: 3 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_02_raw.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ROOT(columns=[('account_type', unmask_a_type), ('n', n_rows), ('avg_bal', ROUND(avg_unmask_a_balance, 2:numeric))], orderings=[])
AGGREGATE(keys={'unmask_a_type': UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))}, aggregations={'avg_unmask_a_balance': AVG(UNMASK::(SQRT([a_balance]))), 'n_rows': COUNT()})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_type': a_type})
3 changes: 3 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_02_rewrite.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ROOT(columns=[('account_type', unmask_a_type), ('n', n_rows), ('avg_bal', ROUND(avg_unmask_a_balance, 2:numeric))], orderings=[])
AGGREGATE(keys={'unmask_a_type': UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))}, aggregations={'avg_unmask_a_balance': AVG(UNMASK::(SQRT([a_balance]))), 'n_rows': COUNT()})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_type': a_type})
5 changes: 0 additions & 5 deletions tests/test_plan_refsols/cryptbank_agg_03.txt

This file was deleted.

5 changes: 5 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_03_raw.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('balance', UNMASK::(SQRT([a_balance]))), ('name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))))], orderings=[])
FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname})
JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_ACCESS, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type})
SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname})
5 changes: 5 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_03_rewrite.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('balance', UNMASK::(SQRT([a_balance]))), ('name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))))], orderings=[])
FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname})
JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_ACCESS, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type})
SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname})
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[])
JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_a_balance': t1.sum_a_balance})
ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[])
JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance})
SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key})
AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_a_balance': SUM(a_balance)})
AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey})
5 changes: 5 additions & 0 deletions tests/test_plan_refsols/cryptbank_agg_04_rewrite.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ROOT(columns=[('branch_key', b_key), ('pct_total_wealth', ROUND(DEFAULT_TO(sum_unmask_a_balance, 0:numeric) / RELSUM(args=[DEFAULT_TO(sum_unmask_a_balance, 0:numeric)], partition=[], order=[]), 2:numeric))], orderings=[])
JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=SINGULAR_ACCESS, columns={'b_key': t0.b_key, 'sum_unmask_a_balance': t1.sum_unmask_a_balance})
SCAN(table=CRBNK.BRANCHES, columns={'b_key': b_key})
AGGREGATE(keys={'a_branchkey': a_branchkey}, aggregations={'sum_unmask_a_balance': SUM(UNMASK::(SQRT([a_balance])))})
SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_branchkey': a_branchkey})
6 changes: 0 additions & 6 deletions tests/test_plan_refsols/cryptbank_agg_05.txt

This file was deleted.

Loading