Skip to content

Commit f41c108

Browse files
Add support for joining tables in anonymizing queries.
1 parent 6711291 commit f41c108

File tree

5 files changed

+140
-43
lines changed

5 files changed

+140
-43
lines changed

src/query/validation.c

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
} while (0)
3030

3131
static void verify_where(Query *query);
32-
static void verify_rtable(Query *query);
32+
static void verify_select_targets(Query *query);
3333
static void verify_aggregators(Query *query);
3434
static void verify_non_system_column(Var *var);
3535
static bool option_matches(DefElem *option, char *name, bool value);
@@ -106,24 +106,64 @@ void verify_anonymization_requirements(Query *query)
106106

107107
verify_where(query);
108108
verify_aggregators(query);
109-
verify_rtable(query);
109+
verify_select_targets(query);
110110
}
111111

112-
static void verify_rtable(Query *query)
112+
static void verify_select_targets(Query *query)
113113
{
114-
NOT_SUPPORTED(list_length(query->rtable) > 1, "JOINs in anonymizing queries");
115-
116114
ListCell *cell = NULL;
115+
117116
foreach (cell, query->rtable)
118117
{
119118
RangeTblEntry *range_table = lfirst_node(RangeTblEntry, cell);
120-
NOT_SUPPORTED(range_table->rtekind == RTE_SUBQUERY, "Subqueries in anonymizing queries");
121-
NOT_SUPPORTED(range_table->rtekind == RTE_JOIN, "JOINs in anonymizing queries");
122119

123-
if (range_table->rtekind == RTE_RELATION)
124-
NOT_SUPPORTED(has_subclass(range_table->relid) || has_superclass(range_table->relid), "Inheritance in anonymizing queries.");
125-
else
126-
FAILWITH("Unsupported FROM clause.");
120+
switch (range_table->rtekind)
121+
{
122+
case RTE_RELATION:
123+
NOT_SUPPORTED(has_subclass(range_table->relid) || has_superclass(range_table->relid), "Inheritance in anonymizing queries");
124+
break;
125+
126+
case RTE_JOIN:
127+
NOT_SUPPORTED(range_table->jointype == JOIN_SEMI || range_table->jointype == JOIN_ANTI, "SEMI JOIN in anonymizing queries");
128+
break;
129+
130+
case RTE_SUBQUERY:
131+
FAILWITH_CODE(ERRCODE_FEATURE_NOT_SUPPORTED, "Subqueries in anonymizing queries are not supported.");
132+
break;
133+
134+
default:
135+
FAILWITH_CODE(ERRCODE_FEATURE_NOT_SUPPORTED, "Unsupported FROM clause.");
136+
break;
137+
}
138+
}
139+
140+
foreach (cell, query->jointree->fromlist)
141+
{
142+
if (list_length(query->jointree->fromlist) == 1 && IsA(lfirst(cell), RangeTblRef))
143+
break;
144+
145+
NOT_SUPPORTED(IsA(lfirst(cell), RangeTblRef) || IsA(lfirst(cell), FromExpr), "CROSS JOIN in anonymizing queries");
146+
147+
Assert(IsA(lfirst(cell), JoinExpr));
148+
JoinExpr *join_expr = lfirst_node(JoinExpr, cell);
149+
150+
List *subjects = NIL, *targets = NIL;
151+
collect_equalities_from_filters(join_expr->quals, &subjects, &targets);
152+
153+
ListCell *subject_cell = NULL, *target_cell = NULL;
154+
forboth(subject_cell, subjects, target_cell, targets)
155+
{
156+
Node *subject_expression = unwrap_cast(lfirst(subject_cell));
157+
Node *target_expression = unwrap_cast(lfirst(target_cell));
158+
159+
if (!IsA(subject_expression, Var))
160+
FAILWITH_CODE(ERRCODE_FEATURE_NOT_SUPPORTED, "Left side of equality in pre-anonymization JOIN filter has to be a simple column reference.");
161+
if (!IsA(target_expression, Var))
162+
FAILWITH_CODE(ERRCODE_FEATURE_NOT_SUPPORTED, "Right side of equality in pre-anonymization JOIN filter has to be a simple column reference.");
163+
}
164+
165+
list_free(subjects);
166+
list_free(targets);
127167
}
128168
}
129169

@@ -422,7 +462,7 @@ void collect_equalities_from_filters(Node *node, List **subjects, List **targets
422462
}
423463
}
424464

425-
FAILWITH("Only equalities between generalization expressions and constants are allowed as pre-anonymization filters.");
465+
FAILWITH("Only equalities are allowed in pre-anonymization filters.");
426466
}
427467

428468
static Var *get_bucket_expression_column_ref(Node *bucket_expression)

test/expected/noisy.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,15 @@ UNION
303303
7
304304
(1 row)
305305

306+
----------------------------------------------------------------
307+
-- JOIN queries
308+
----------------------------------------------------------------
309+
-- JOIN order doesn't affect results
310+
(SELECT COUNT(*) FROM test_customers AS c JOIN test_purchases ON c.id = cid WHERE city = 'Berlin')
311+
UNION
312+
(SELECT COUNT(*) FROM test_purchases JOIN test_customers AS c ON cid = c.id WHERE city = 'Berlin');
313+
count
314+
-------
315+
44
316+
(1 row)
317+

test/expected/validation.out

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,41 @@ EXECUTE prepared(1.0);
327327
----------+-------
328328
(0 rows)
329329

330+
-- Allow anonymizing JOINs
331+
SELECT COUNT(*) FROM test_validation AS c
332+
INNER JOIN test_purchases ON c.id = cid;
333+
count
334+
-------
335+
0
336+
(1 row)
337+
338+
SELECT COUNT(*) FROM test_validation AS t1
339+
INNER JOIN test_validation AS t2 ON t1.name = t2.name AND t1.city = t2.city;
340+
count
341+
-------
342+
0
343+
(1 row)
344+
345+
SELECT COUNT(c.city), COUNT(p.name) FROM test_validation AS c
346+
LEFT JOIN test_purchases ON c.id = cid
347+
LEFT JOIN test_products AS p ON pid = p.id;
348+
count | count
349+
-------+-------
350+
0 | 0
351+
(1 row)
352+
353+
SELECT COUNT(*) FROM test_validation NATURAL JOIN test_patients;
354+
count
355+
-------
356+
0
357+
(1 row)
358+
359+
SELECT COUNT(*) FROM test_validation JOIN test_patients USING (name);
360+
count
361+
-------
362+
0
363+
(1 row)
364+
330365
----------------------------------------------------------------
331366
-- Unsupported queries
332367
----------------------------------------------------------------
@@ -477,25 +512,20 @@ SELECT GREATEST(discount, 20) FROM test_validation;
477512
ERROR: [PG_DIFFIX] Unsupported generalization expression.
478513
SELECT LEAST(discount, 20) FROM test_validation;
479514
ERROR: [PG_DIFFIX] Unsupported generalization expression.
480-
-- Get rejected because of JOINs
481-
SELECT COUNT(*), COUNT(DISTINCT id), COUNT(DISTINCT cid) FROM test_validation
482-
INNER JOIN test_purchases tp ON id = cid;
483-
ERROR: [PG_DIFFIX] Feature 'JOINs in anonymizing queries' is not currently supported.
484-
SELECT COUNT(c.city), COUNT(p.name) FROM test_validation c
485-
LEFT JOIN test_purchases ON c.id = cid
486-
LEFT JOIN test_products p ON pid = p.id;
487-
ERROR: [PG_DIFFIX] Feature 'JOINs in anonymizing queries' is not currently supported.
488-
SELECT city, COUNT(price) FROM test_validation, test_products GROUP BY 1;
489-
ERROR: [PG_DIFFIX] Feature 'JOINs in anonymizing queries' is not currently supported.
490-
SELECT city, COUNT(price) FROM test_products, test_validation GROUP BY 1;
491-
ERROR: [PG_DIFFIX] Feature 'JOINs in anonymizing queries' is not currently supported.
492-
SELECT city, COUNT(price) FROM test_products CROSS JOIN test_validation GROUP BY 1;
493-
ERROR: [PG_DIFFIX] Feature 'JOINs in anonymizing queries' is not currently supported.
515+
-- Get rejected because of invalid JOINs
516+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON id != cid;
517+
ERROR: [PG_DIFFIX] Only equalities are allowed in pre-anonymization filters.
518+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON id = cid OR cid = id;
519+
ERROR: [PG_DIFFIX] Only equalities are allowed in pre-anonymization filters.
520+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON true;
521+
ERROR: [PG_DIFFIX] Only equalities are allowed in pre-anonymization filters.
522+
SELECT COUNT(*) FROM test_validation, test_purchases;
523+
ERROR: [PG_DIFFIX] Feature 'CROSS JOIN in anonymizing queries' is not currently supported.
494524
-- Get rejected because of invalid WHERE clauses
495525
SELECT COUNT(*) FROM test_validation WHERE city <> 'London';
496-
ERROR: [PG_DIFFIX] Only equalities between generalization expressions and constants are allowed as pre-anonymization filters.
526+
ERROR: [PG_DIFFIX] Only equalities are allowed in pre-anonymization filters.
497527
SELECT COUNT(*) FROM test_validation WHERE city = 'London' OR discount = 10;
498-
ERROR: [PG_DIFFIX] Only equalities between generalization expressions and constants are allowed as pre-anonymization filters.
528+
ERROR: [PG_DIFFIX] Only equalities are allowed in pre-anonymization filters.
499529
SELECT COUNT(*) FROM test_validation WHERE diffix.round_by(id, 5) = 0;
500530
ERROR: [PG_DIFFIX] AID columns can't be referenced by pre-anonymization filters.
501531
LINE 1: ...UNT(*) FROM test_validation WHERE diffix.round_by(id, 5) = 0...
@@ -563,9 +593,9 @@ SELECT * FROM pg_stat_activity LIMIT 10;
563593
ERROR: permission denied for schema pg_catalog
564594
-- Get rejected because of inheritance
565595
SELECT x, y FROM subclass;
566-
ERROR: [PG_DIFFIX] Feature 'Inheritance in anonymizing queries.' is not currently supported.
596+
ERROR: [PG_DIFFIX] Feature 'Inheritance in anonymizing queries' is not currently supported.
567597
SELECT x FROM superclass;
568-
ERROR: [PG_DIFFIX] Feature 'Inheritance in anonymizing queries.' is not currently supported.
598+
ERROR: [PG_DIFFIX] Feature 'Inheritance in anonymizing queries' is not currently supported.
569599
-- Get rejected because attempt to use system columns
570600
SELECT ctid FROM test_validation;
571601
ERROR: [PG_DIFFIX] System columns are not allowed in this context.

test/sql/noisy.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,12 @@ UNION
110110
(SELECT count(*) FROM test_customers WHERE diffix.round_by(discount, 2) = 0 GROUP BY city HAVING city = 'Berlin')
111111
UNION
112112
(SELECT count(*) FROM test_customers WHERE city = 'Berlin' AND diffix.round_by(discount, 2) = 0);
113+
114+
----------------------------------------------------------------
115+
-- JOIN queries
116+
----------------------------------------------------------------
117+
118+
-- JOIN order doesn't affect results
119+
(SELECT COUNT(*) FROM test_customers AS c JOIN test_purchases ON c.id = cid WHERE city = 'Berlin')
120+
UNION
121+
(SELECT COUNT(*) FROM test_purchases JOIN test_customers AS c ON cid = c.id WHERE city = 'Berlin');

test/sql/validation.sql

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,20 @@ SELECT * FROM diffix.show_labels() WHERE objname LIKE 'public.empty_test_custome
172172
PREPARE prepared(float) AS SELECT discount, count(*) FROM empty_test_customers WHERE discount = $1 GROUP BY 1;
173173
EXECUTE prepared(1.0);
174174

175+
-- Allow anonymizing JOINs
176+
SELECT COUNT(*) FROM test_validation AS c
177+
INNER JOIN test_purchases ON c.id = cid;
178+
179+
SELECT COUNT(*) FROM test_validation AS t1
180+
INNER JOIN test_validation AS t2 ON t1.name = t2.name AND t1.city = t2.city;
181+
182+
SELECT COUNT(c.city), COUNT(p.name) FROM test_validation AS c
183+
LEFT JOIN test_purchases ON c.id = cid
184+
LEFT JOIN test_products AS p ON pid = p.id;
185+
186+
SELECT COUNT(*) FROM test_validation NATURAL JOIN test_patients;
187+
SELECT COUNT(*) FROM test_validation JOIN test_patients USING (name);
188+
175189
----------------------------------------------------------------
176190
-- Unsupported queries
177191
----------------------------------------------------------------
@@ -242,19 +256,11 @@ SELECT NULLIF(discount, 20) FROM test_validation;
242256
SELECT GREATEST(discount, 20) FROM test_validation;
243257
SELECT LEAST(discount, 20) FROM test_validation;
244258

245-
-- Get rejected because of JOINs
246-
SELECT COUNT(*), COUNT(DISTINCT id), COUNT(DISTINCT cid) FROM test_validation
247-
INNER JOIN test_purchases tp ON id = cid;
248-
249-
SELECT COUNT(c.city), COUNT(p.name) FROM test_validation c
250-
LEFT JOIN test_purchases ON c.id = cid
251-
LEFT JOIN test_products p ON pid = p.id;
252-
253-
SELECT city, COUNT(price) FROM test_validation, test_products GROUP BY 1;
254-
255-
SELECT city, COUNT(price) FROM test_products, test_validation GROUP BY 1;
256-
257-
SELECT city, COUNT(price) FROM test_products CROSS JOIN test_validation GROUP BY 1;
259+
-- Get rejected because of invalid JOINs
260+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON id != cid;
261+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON id = cid OR cid = id;
262+
SELECT COUNT(*) FROM test_validation JOIN test_purchases ON true;
263+
SELECT COUNT(*) FROM test_validation, test_purchases;
258264

259265
-- Get rejected because of invalid WHERE clauses
260266
SELECT COUNT(*) FROM test_validation WHERE city <> 'London';

0 commit comments

Comments
 (0)