Skip to content

Commit 1cab5d3

Browse files
authored
Merge pull request #447 from diffix/piotr/date_trunc
Support date_trunc
2 parents a2b487c + c5c39ef commit 1cab5d3

File tree

8 files changed

+206
-55
lines changed

8 files changed

+206
-55
lines changed

pg_diffix/query/allowed_objects.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
*/
99
extern bool is_allowed_function(Oid funcoid);
1010

11+
/*
12+
* Returns index of the primary argument of an allowed function, i.e. the one intended to
13+
* be the column reference.
14+
*/
15+
extern int primary_arg_index(Oid funcoid);
16+
1117
/*
1218
* Returns whether the OID points to a cast allowed in defining buckets.
1319
*/

src/query/allowed_objects.c

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,49 @@ static const char *const g_allowed_casts[] = {
1414
"ftod", "dtof",
1515
"int4_numeric", "float4_numeric", "float8_numeric",
1616
"numeric_float4", "numeric_float8",
17+
"date_timestamptz",
1718
/**/
1819
};
1920

20-
static const char *const g_allowed_builtins[] = {
21+
typedef struct FunctionByName
22+
{
23+
const char *name;
24+
int primary_arg;
25+
} FunctionByName;
26+
27+
typedef struct FunctionByOid
28+
{
29+
Oid funcid;
30+
int primary_arg;
31+
} FunctionByOid;
32+
33+
static const FunctionByName g_allowed_builtins[] = {
2134
/* rounding casts */
22-
"ftoi2", "ftoi4", "ftoi8", "dtoi2", "dtoi4", "dtoi8", "numeric_int4",
35+
(FunctionByName){.name = "ftoi2", .primary_arg = 0},
36+
(FunctionByName){.name = "ftoi4", .primary_arg = 0},
37+
(FunctionByName){.name = "ftoi8", .primary_arg = 0},
38+
(FunctionByName){.name = "dtoi2", .primary_arg = 0},
39+
(FunctionByName){.name = "dtoi4", .primary_arg = 0},
40+
(FunctionByName){.name = "dtoi8", .primary_arg = 0},
41+
(FunctionByName){.name = "numeric_int4", .primary_arg = 0},
2342
/* substring */
24-
"text_substr", "text_substr_no_len", "bytea_substr", "bytea_substr_no_len",
43+
(FunctionByName){.name = "text_substr", .primary_arg = 0},
44+
(FunctionByName){.name = "text_substr_no_len", .primary_arg = 0},
45+
(FunctionByName){.name = "bytea_substr", .primary_arg = 0},
46+
(FunctionByName){.name = "bytea_substr_no_len", .primary_arg = 0},
2547
/* numeric generalization */
26-
"dround", "numeric_round", "dceil", "numeric_ceil", "dfloor", "numeric_floor",
48+
(FunctionByName){.name = "dround", .primary_arg = 0},
49+
(FunctionByName){.name = "numeric_round", .primary_arg = 0},
50+
(FunctionByName){.name = "dceil", .primary_arg = 0},
51+
(FunctionByName){.name = "numeric_ceil", .primary_arg = 0},
52+
(FunctionByName){.name = "dfloor", .primary_arg = 0},
53+
(FunctionByName){.name = "numeric_floor", .primary_arg = 0},
2754
/* width_bucket */
28-
"width_bucket_float8", "width_bucket_numeric",
55+
(FunctionByName){.name = "width_bucket_float8", .primary_arg = 0},
56+
(FunctionByName){.name = "width_bucket_numeric", .primary_arg = 0},
57+
/* date_trunc */
58+
(FunctionByName){.name = "timestamptz_trunc", .primary_arg = 1},
59+
(FunctionByName){.name = "timestamp_trunc", .primary_arg = 1},
2960
/**/
3061
};
3162

@@ -42,7 +73,7 @@ static const char *const g_implicit_range_builtins_untrusted[] = {
4273

4374
/* Some allowed functions don't appear in the builtins catalog, so we must allow them manually by OID. */
4475
#define F_NUMERIC_ROUND_INT 1708
45-
static const Oid g_allowed_builtins_extra[] = {F_NUMERIC_ROUND_INT};
76+
static const FunctionByOid g_allowed_builtins_extra[] = {(FunctionByOid){.funcid = F_NUMERIC_ROUND_INT, .primary_arg = 0}};
4677

4778
typedef struct AllowedCols
4879
{
@@ -116,6 +147,21 @@ static const FmgrBuiltin *fmgr_isbuiltin(Oid id)
116147
return &fmgr_builtins[index];
117148
}
118149

150+
static bool is_func_member_of(Oid funcoid, const FunctionByName func_array[], int length)
151+
{
152+
const FmgrBuiltin *fmgr_builtin = fmgr_isbuiltin(funcoid);
153+
if (fmgr_builtin != NULL)
154+
{
155+
for (int i = 0; i < length; i++)
156+
{
157+
if (strcmp(func_array[i].name, fmgr_builtin->funcName) == 0)
158+
return true;
159+
}
160+
}
161+
162+
return false;
163+
}
164+
119165
static bool is_funcname_member_of(Oid funcoid, const char *const name_array[], int length)
120166
{
121167
const FmgrBuiltin *fmgr_builtin = fmgr_isbuiltin(funcoid);
@@ -131,6 +177,34 @@ static bool is_funcname_member_of(Oid funcoid, const char *const name_array[], i
131177
return false;
132178
}
133179

180+
int primary_arg_index(Oid funcoid)
181+
{
182+
for (int i = 0; i < ARRAY_LENGTH(g_implicit_range_udfs); i++)
183+
{
184+
/* We ensured that our UDFs have the primary arg first. */
185+
if (*g_implicit_range_udfs[i] == funcoid)
186+
return 0;
187+
}
188+
189+
const FmgrBuiltin *fmgr_builtin = fmgr_isbuiltin(funcoid);
190+
if (fmgr_builtin != NULL)
191+
{
192+
for (int i = 0; i < ARRAY_LENGTH(g_allowed_builtins); i++)
193+
{
194+
if (strcmp(g_allowed_builtins[i].name, fmgr_builtin->funcName) == 0)
195+
return g_allowed_builtins[i].primary_arg;
196+
}
197+
}
198+
199+
for (int i = 0; i < ARRAY_LENGTH(g_allowed_builtins_extra); i++)
200+
{
201+
if (g_allowed_builtins_extra[i].funcid == funcoid)
202+
return g_allowed_builtins_extra[i].primary_arg;
203+
}
204+
205+
FAILWITH("Cannot identify the primary argument position for funcid %u.", funcoid);
206+
}
207+
134208
bool is_allowed_cast(Oid funcoid)
135209
{
136210
return is_funcname_member_of(funcoid, g_allowed_casts, ARRAY_LENGTH(g_allowed_casts));
@@ -154,12 +228,12 @@ bool is_allowed_function(Oid funcoid)
154228
return true;
155229
}
156230

157-
if (is_funcname_member_of(funcoid, g_allowed_builtins, ARRAY_LENGTH(g_allowed_builtins)))
231+
if (is_func_member_of(funcoid, g_allowed_builtins, ARRAY_LENGTH(g_allowed_builtins)))
158232
return true;
159233

160234
for (int i = 0; i < ARRAY_LENGTH(g_allowed_builtins_extra); i++)
161235
{
162-
if (g_allowed_builtins_extra[i] == funcoid)
236+
if (g_allowed_builtins_extra[i].funcid == funcoid)
163237
return true;
164238
}
165239

src/query/anonymization.c

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,39 @@ static void append_seed_material(
534534
strcpy(existing_material + existing_material_length, new_material);
535535
}
536536

537+
static char *datum_seed_material(Oid type, Datum value, bool is_null)
538+
{
539+
if (is_null)
540+
{
541+
char *value_as_string = palloc(sizeof(char) * (4 + 1));
542+
strcpy(value_as_string, "NULL");
543+
return value_as_string;
544+
}
545+
else if (is_supported_numeric_type(type))
546+
{
547+
/* Normalize numeric values. */
548+
double value_as_double = numeric_value_to_double(type, value);
549+
char *value_as_string = palloc(sizeof(char) * DOUBLE_SHORTEST_DECIMAL_LEN);
550+
double_to_shortest_decimal_buf(value_as_double, value_as_string);
551+
return value_as_string;
552+
}
553+
else if (TypeCategory(type) == TYPCATEGORY_DATETIME)
554+
{
555+
char *value_as_string = palloc(sizeof(char) * (MAXDATELEN + 1));
556+
/* Leveraging `json.h` with UTC to get style-stable encoding of various datetime types. */
557+
const int tzp = 0;
558+
JsonEncodeDateTime(value_as_string, value, type, &tzp);
559+
return value_as_string;
560+
}
561+
562+
/* Handle all other types by casting to text. */
563+
Oid type_output_funcid = InvalidOid;
564+
bool is_varlena = false;
565+
getTypeOutputInfo(type, &type_output_funcid, &is_varlena);
566+
567+
return OidOutputFunctionCall(type_output_funcid, value);
568+
}
569+
537570
typedef struct CollectMaterialContext
538571
{
539572
Query *query;
@@ -585,13 +618,9 @@ static bool collect_seed_material(Node *node, CollectMaterialContext *context)
585618
bool isnull;
586619
get_stable_expression_value(node, context->bound_params, &type, &value, &isnull);
587620

588-
if (!is_supported_numeric_type(type))
589-
FAILWITH_LOCATION(exprLocation(node), "Unsupported constant type used in bucket definition!");
590-
591-
double const_as_double = numeric_value_to_double(type, value);
592-
char const_as_string[DOUBLE_SHORTEST_DECIMAL_LEN];
593-
double_to_shortest_decimal_buf(const_as_double, const_as_string);
594-
append_seed_material(context->material, const_as_string, ',');
621+
char *value_as_string = datum_seed_material(type, value, isnull);
622+
append_seed_material(context->material, value_as_string, ',');
623+
pfree(value_as_string);
595624
}
596625

597626
/* We ignore unknown nodes. Validation should make sure nothing unsafe reaches this stage. */
@@ -617,36 +646,9 @@ static void collect_seed_material_hashes(Query *query, List *exprs, List **seed_
617646

618647
static hash_t hash_label(Oid type, Datum value, bool is_null)
619648
{
620-
if (is_null)
621-
return hash_string("NULL");
622-
623-
if (is_supported_numeric_type(type))
624-
{
625-
/* Normalize numeric values. */
626-
double value_as_double = numeric_value_to_double(type, value);
627-
char value_as_string[DOUBLE_SHORTEST_DECIMAL_LEN];
628-
double_to_shortest_decimal_buf(value_as_double, value_as_string);
629-
return hash_string(value_as_string);
630-
}
631-
632-
if (TypeCategory(type) == TYPCATEGORY_DATETIME)
633-
{
634-
char value_as_string[MAXDATELEN + 1];
635-
/* Leveraging `json.h` with UTC to get style-stable encoding of various datetime types. */
636-
const int tzp = 0;
637-
JsonEncodeDateTime(value_as_string, value, type, &tzp);
638-
return hash_string(value_as_string);
639-
}
640-
641-
/* Handle all other types by casting to text. */
642-
Oid type_output_funcid = InvalidOid;
643-
bool is_varlena = false;
644-
getTypeOutputInfo(type, &type_output_funcid, &is_varlena);
645-
646-
char *value_as_string = OidOutputFunctionCall(type_output_funcid, value);
649+
char *value_as_string = datum_seed_material(type, value, is_null);
647650
hash_t hash = hash_string(value_as_string);
648651
pfree(value_as_string);
649-
650652
return hash;
651653
}
652654

src/query/validation.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,14 @@ static void verify_bucket_expression(Node *node)
237237

238238
Assert(list_length(func_expr->args) > 0); /* All allowed functions require at least one argument. */
239239

240-
if (!IsA(unwrap_cast(linitial(func_expr->args)), Var))
240+
int primary_arg = primary_arg_index(func_expr->funcid);
241+
if (!IsA(unwrap_cast(list_nth(func_expr->args, primary_arg)), Var))
241242
FAILWITH_LOCATION(func_expr->location, "Primary argument for a generalization function has to be a simple column reference.");
242243

243-
for (int i = 1; i < list_length(func_expr->args); i++)
244+
for (int i = 0; i < list_length(func_expr->args); i++)
244245
{
246+
if (i == primary_arg)
247+
continue;
245248
Node *arg = unwrap_cast((Node *)list_nth(func_expr->args, i));
246249
if (!is_stable_expression(arg))
247250
FAILWITH_LOCATION(exprLocation(arg), "Non-primary arguments for a generalization function have to be simple constants.");

test/expected/datetime.out

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ INSERT INTO test_datetime VALUES
1616
(6, '2012-05-14', '14:59', '2012-05-14', '2012-05-14', '1 years'),
1717
(7, '2012-05-14', '14:59', '2012-05-14', '2012-05-14', '1 years');
1818
CALL diffix.mark_personal('test_datetime', 'id');
19+
GRANT ALL PRIVILEGES ON TABLE test_datetime TO diffix_test;
1920
SET ROLE diffix_test;
2021
SET pg_diffix.session_access_level = 'anonymized_trusted';
2122
----------------------------------------------------------------
@@ -30,7 +31,7 @@ SELECT diffix.access_level();
3031
----------------------------------------------------------------
3132
-- Seeding
3233
----------------------------------------------------------------
33-
-- Datetime values are seeded the same regardless of global `datestyle` setting
34+
-- Datetime values are seeded in UTC the same regardless of global `datestyle` setting
3435
SET datestyle = 'SQL';
3536
SELECT ts, count(*) FROM test_datetime GROUP BY 1;
3637
ts | count
@@ -45,17 +46,35 @@ SELECT ts, count(*) FROM test_datetime GROUP BY 1;
4546
2012-05-14 00:00:00 | 9
4647
(1 row)
4748

49+
SET TIMEZONE TO 'EST';
50+
SELECT tz, count(*) FROM test_datetime GROUP BY 1;
51+
tz | count
52+
------------------------+-------
53+
2012-05-14 02:00:00-05 | 11
54+
(1 row)
55+
4856
SET TIMEZONE TO 'UTC';
4957
SELECT tz, count(*) FROM test_datetime GROUP BY 1;
5058
tz | count
5159
------------------------+-------
5260
2012-05-14 07:00:00+00 | 11
5361
(1 row)
5462

55-
SET TIMEZONE TO DEFAULT;
63+
SET pg_diffix.session_access_level = 'direct';
64+
UPDATE test_datetime SET tz = '2012-05-14T07:00+00:00' WHERE true;
65+
SET pg_diffix.session_access_level = 'anonymized_trusted';
66+
SELECT tz, count(*) FROM test_datetime GROUP BY 1;
67+
tz | count
68+
------------------------+-------
69+
2012-05-14 07:00:00+00 | 11
70+
(1 row)
71+
72+
SET pg_diffix.session_access_level = 'direct';
73+
UPDATE test_datetime SET tz = '2012-05-14T08:00+01:00' WHERE true;
74+
SET pg_diffix.session_access_level = 'anonymized_trusted';
5675
SELECT tz, count(*) FROM test_datetime GROUP BY 1;
5776
tz | count
5877
------------------------+-------
59-
2012-05-14 00:00:00-07 | 11
78+
2012-05-14 07:00:00+00 | 11
6079
(1 row)
6180

test/expected/validation.out

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ CREATE TABLE test_validation (
66
discount REAL,
77
birthday DATE,
88
lunchtime TIME,
9-
last_seen TIMESTAMP
9+
last_seen TIMESTAMP,
10+
last_seen_tz TIMESTAMP WITH TIME ZONE
1011
);
1112
CALL diffix.mark_personal('test_validation', 'id');
1213
CALL diffix.mark_not_filterable('test_validation', 'birthday');
@@ -116,12 +117,23 @@ GROUP BY 1, 2;
116117

117118
SELECT
118119
substring(cast(last_seen AS text), 1, 3),
120+
substring(cast(last_seen_tz AS text), 1, 3),
119121
substring(cast(birthday AS text), 2, 3),
120122
substring(cast(lunchtime AS varchar), 1, 4)
121123
FROM test_validation
124+
GROUP BY 1, 2, 3, 4;
125+
substring | substring | substring | substring
126+
-----------+-----------+-----------+-----------
127+
(0 rows)
128+
129+
SELECT
130+
date_trunc('year', last_seen),
131+
date_trunc('year', last_seen_tz),
132+
date_trunc('year', birthday)
133+
FROM test_validation
122134
GROUP BY 1, 2, 3;
123-
substring | substring | substring
124-
-----------+-----------+-----------
135+
date_trunc | date_trunc | date_trunc
136+
------------+------------+------------
125137
(0 rows)
126138

127139
-- Allow all functions post-anonymization.
@@ -427,6 +439,16 @@ SELECT COUNT(*) FROM test_validation GROUP BY substr('aaaa', 1, 2);
427439
ERROR: [PG_DIFFIX] Primary argument for a generalization function has to be a simple column reference.
428440
LINE 1: SELECT COUNT(*) FROM test_validation GROUP BY substr('aaaa',...
429441
^
442+
-- Get rejected because of lack of interval support
443+
SELECT date_trunc('year', lunchtime) FROM test_validation GROUP BY 1;
444+
ERROR: [PG_DIFFIX] Unsupported function used for generalization.
445+
LINE 1: SELECT date_trunc('year', lunchtime) FROM test_validation GR...
446+
^
447+
-- Get rejected because of averaging opportunity
448+
SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_validation GROUP BY 1;
449+
ERROR: [PG_DIFFIX] Unsupported function used for generalization.
450+
LINE 1: SELECT date_trunc('year', last_seen_tz, 'EST') FROM test_va...
451+
^
430452
-- Get rejected because expression node type is unsupported.
431453
SELECT COALESCE(discount, 20) FROM test_validation;
432454
ERROR: [PG_DIFFIX] Unsupported generalization expression.

0 commit comments

Comments
 (0)