Skip to content

Commit 6711291

Browse files
authored
Merge pull request #450 from diffix/piotr/extract-datetime
Support `extract(... from ...)`
2 parents 81c2461 + 8e82bad commit 6711291

File tree

7 files changed

+84
-8
lines changed

7 files changed

+84
-8
lines changed

src/query/allowed_objects.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "postgres.h"
22

33
#include "access/sysattr.h"
4+
#include "utils/fmgroids.h"
45
#include "utils/fmgrtab.h"
56
#include "utils/lsyscache.h"
67
#include "utils/memutils.h"
@@ -57,6 +58,12 @@ static const FunctionByName g_allowed_builtins[] = {
5758
/* date_trunc */
5859
(FunctionByName){.name = "timestamptz_trunc", .primary_arg = 1},
5960
(FunctionByName){.name = "timestamp_trunc", .primary_arg = 1},
61+
/* extract & date_part*/
62+
(FunctionByName){.name = "extract_date", .primary_arg = 1},
63+
(FunctionByName){.name = "extract_timestamp", .primary_arg = 1},
64+
(FunctionByName){.name = "extract_timestamptz", .primary_arg = 1},
65+
(FunctionByName){.name = "timestamp_part", .primary_arg = 1},
66+
(FunctionByName){.name = "timestamptz_part", .primary_arg = 1},
6067
/**/
6168
};
6269

@@ -73,7 +80,19 @@ static const char *const g_implicit_range_builtins_untrusted[] = {
7380

7481
/* Some allowed functions don't appear in the builtins catalog, so we must allow them manually by OID. */
7582
#define F_NUMERIC_ROUND_INT 1708
76-
static const FunctionByOid g_allowed_builtins_extra[] = {(FunctionByOid){.funcid = F_NUMERIC_ROUND_INT, .primary_arg = 0}};
83+
/*
84+
* `date_part` for `date` is a SQL builtin and doesn't show up in `fmgr_isbuiltin`.
85+
* PG 14 has the define, but PG 13 doesn't.
86+
*/
87+
#if PG_MAJORVERSION_NUM < 14
88+
#define F_DATE_PART_TEXT_DATE 1384
89+
#endif
90+
91+
static const FunctionByOid g_allowed_builtins_extra[] = {
92+
(FunctionByOid){.funcid = F_NUMERIC_ROUND_INT, .primary_arg = 0},
93+
(FunctionByOid){.funcid = F_DATE_PART_TEXT_DATE, .primary_arg = 1},
94+
/**/
95+
};
7796

7897
typedef struct AllowedCols
7998
{

src/query/anonymization.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,15 @@ typedef struct CollectMaterialContext
574574
char material[MAX_SEED_MATERIAL_SIZE];
575575
} CollectMaterialContext;
576576

577+
static void normalize_function_name(char *func_name)
578+
{
579+
if (strcmp(func_name, "date_part") == 0)
580+
{
581+
// Not reallocing the `func_name`, because the normalized string is shorter.
582+
strcpy(func_name, "extract");
583+
}
584+
}
585+
577586
static bool collect_seed_material(Node *node, CollectMaterialContext *context)
578587
{
579588
if (node == NULL)
@@ -587,7 +596,7 @@ static bool collect_seed_material(Node *node, CollectMaterialContext *context)
587596
char *func_name = get_func_name(func_expr->funcid);
588597
if (func_name)
589598
{
590-
/* TODO: Normalize function names. */
599+
normalize_function_name(func_name);
591600
append_seed_material(context->material, func_name, ',');
592601
pfree(func_name);
593602
}

src/query/validation.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ static bool verify_aggregator(Node *node, void *context)
188188
aggoid != g_oid_cache.is_suppress_bin)
189189
FAILWITH_LOCATION(aggref->location, "Unsupported aggregate in query.");
190190

191+
if ((aggoid == g_oid_cache.sum_noise ||
192+
aggoid == g_oid_cache.avg_noise) &&
193+
TypeCategory(linitial_oid(aggref->aggargtypes)) == TYPCATEGORY_DATETIME)
194+
FAILWITH_LOCATION(aggref->location, "Unsupported aggregate in query.");
195+
191196
if (aggoid == g_oid_cache.count_value || aggoid == g_oid_cache.count_value_noise ||
192197
is_sum_oid(aggoid) || aggoid == g_oid_cache.sum_noise ||
193198
is_avg_oid(aggoid) || aggoid == g_oid_cache.avg_noise)

test/expected/datetime.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,15 @@ SELECT count(*) FROM test_datetime WHERE date_trunc('year', ts) = '2012-01-01'::
8585
11
8686
(1 row)
8787

88+
SELECT count(*) FROM test_datetime WHERE extract(century from ts) = 21;
89+
count
90+
-------
91+
9
92+
(1 row)
93+
94+
SELECT count(*) FROM test_datetime WHERE date_part('century', ts) = 21;
95+
count
96+
-------
97+
9
98+
(1 row)
99+

test/expected/validation.out

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,21 @@ GROUP BY 1, 2, 3, 4;
126126
-----------+-----------+-----------+-----------
127127
(0 rows)
128128

129+
-- `as extract` ensures that the column is aliased consistently in PG 13 and 14.
129130
SELECT
130131
date_trunc('year', last_seen),
131132
date_trunc('year', last_seen_tz),
132-
date_trunc('year', birthday)
133+
date_trunc('year', birthday),
134+
extract(month from last_seen) as extract,
135+
extract(month from last_seen_tz) as extract,
136+
extract(month from birthday) as extract,
137+
date_part('month', last_seen) as date_part,
138+
date_part('month', last_seen_tz) as date_part,
139+
date_part('month', birthday) as date_part
133140
FROM test_validation
134-
GROUP BY 1, 2, 3;
135-
date_trunc | date_trunc | date_trunc
136-
------------+------------+------------
141+
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9;
142+
date_trunc | date_trunc | date_trunc | extract | extract | extract | date_part | date_part | date_part
143+
------------+------------+------------+---------+---------+---------+-----------+-----------+-----------
137144
(0 rows)
138145

139146
-- Allow all functions post-anonymization.
@@ -403,6 +410,14 @@ SELECT diffix.count_histogram(city) FROM test_validation;
403410
ERROR: [PG_DIFFIX] count_histogram argument must be an AID column.
404411
LINE 1: SELECT diffix.count_histogram(city) FROM test_validation;
405412
^
413+
SELECT diffix.sum_noise(last_seen) FROM test_validation;
414+
ERROR: [PG_DIFFIX] Unsupported aggregate in query.
415+
LINE 1: SELECT diffix.sum_noise(last_seen) FROM test_validation;
416+
^
417+
SELECT diffix.avg_noise(last_seen::date) FROM test_validation;
418+
ERROR: [PG_DIFFIX] Unsupported aggregate in query.
419+
LINE 1: SELECT diffix.avg_noise(last_seen::date) FROM test_validatio...
420+
^
406421
-- Get rejected because only a subset of expressions is supported for defining buckets.
407422
SELECT COUNT(*) FROM test_validation GROUP BY LENGTH(city);
408423
ERROR: [PG_DIFFIX] Unsupported function used for generalization.
@@ -444,6 +459,10 @@ SELECT date_trunc('year', lunchtime) FROM test_validation GROUP BY 1;
444459
ERROR: [PG_DIFFIX] Unsupported function used for generalization.
445460
LINE 1: SELECT date_trunc('year', lunchtime) FROM test_validation GR...
446461
^
462+
SELECT extract(hour from lunchtime) FROM test_validation GROUP BY 1;
463+
ERROR: [PG_DIFFIX] Unsupported function used for generalization.
464+
LINE 1: SELECT extract(hour from lunchtime) FROM test_validation GRO...
465+
^
447466
-- Get rejected because of averaging opportunity
448467
SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_validation GROUP BY 1;
449468
ERROR: [PG_DIFFIX] Unsupported function used for generalization.

test/sql/datetime.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,5 @@ SELECT tz, count(*) FROM test_datetime GROUP BY 1;
5656

5757
-- Datetime filtering
5858
SELECT count(*) FROM test_datetime WHERE date_trunc('year', ts) = '2012-01-01'::timestamp;
59+
SELECT count(*) FROM test_datetime WHERE extract(century from ts) = 21;
60+
SELECT count(*) FROM test_datetime WHERE date_part('century', ts) = 21;

test/sql/validation.sql

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,19 @@ SELECT
9393
FROM test_validation
9494
GROUP BY 1, 2, 3, 4;
9595

96+
-- `as extract` ensures that the column is aliased consistently in PG 13 and 14.
9697
SELECT
9798
date_trunc('year', last_seen),
9899
date_trunc('year', last_seen_tz),
99-
date_trunc('year', birthday)
100+
date_trunc('year', birthday),
101+
extract(month from last_seen) as extract,
102+
extract(month from last_seen_tz) as extract,
103+
extract(month from birthday) as extract,
104+
date_part('month', last_seen) as date_part,
105+
date_part('month', last_seen_tz) as date_part,
106+
date_part('month', birthday) as date_part
100107
FROM test_validation
101-
GROUP BY 1, 2, 3;
108+
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9;
102109

103110
-- Allow all functions post-anonymization.
104111
SELECT 2 * length(city) FROM test_validation GROUP BY city;
@@ -207,6 +214,8 @@ SELECT count(distinct least(id, 5)) FROM test_validation;
207214
SELECT count(id + 5) FROM test_validation;
208215
SELECT count(least(id, 5)) FROM test_validation;
209216
SELECT diffix.count_histogram(city) FROM test_validation;
217+
SELECT diffix.sum_noise(last_seen) FROM test_validation;
218+
SELECT diffix.avg_noise(last_seen::date) FROM test_validation;
210219

211220
-- Get rejected because only a subset of expressions is supported for defining buckets.
212221
SELECT COUNT(*) FROM test_validation GROUP BY LENGTH(city);
@@ -222,6 +231,7 @@ SELECT COUNT(*) FROM test_validation GROUP BY substr('aaaa', 1, 2);
222231

223232
-- Get rejected because of lack of interval support
224233
SELECT date_trunc('year', lunchtime) FROM test_validation GROUP BY 1;
234+
SELECT extract(hour from lunchtime) FROM test_validation GROUP BY 1;
225235

226236
-- Get rejected because of averaging opportunity
227237
SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_validation GROUP BY 1;

0 commit comments

Comments
 (0)