Skip to content

Commit b5daeb1

Browse files
authored
Merge pull request #437 from diffix/piotr/fix-avg-divide-by-zero
Fix avg/avg_noise when anonymized count is 0
2 parents 25ef336 + 42d0186 commit b5daeb1

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

pg_diffix/oid_cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ typedef struct Oids
5656

5757
/* Internal functions */
5858
Oid internal_qual_wrapper; /* diffix.internal_qual_wrapper(boolean) */
59+
60+
Oid op_int8eq;
5961
} Oids;
6062

6163
/*

src/oid_cache.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "catalog/pg_type.h"
44
#include "lib/stringinfo.h"
55
#include "parser/parse_func.h"
6+
#include "parser/parse_oper.h"
67
#include "utils/lsyscache.h"
78

89
#include "pg_diffix/oid_cache.h"
@@ -66,6 +67,10 @@ void oid_cache_init(void)
6667

6768
g_oid_cache.internal_qual_wrapper = lookup_function("diffix", "internal_qual_wrapper", 1, (Oid[]){BOOLOID});
6869

70+
List *eq_op_name = list_make1(makeString("="));
71+
g_oid_cache.op_int8eq = LookupOperName(NULL, eq_op_name, INT8OID, INT8OID, false, -1);
72+
list_free_deep(eq_op_name);
73+
6974
g_loaded = true;
7075
}
7176

src/query/anonymization.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -188,15 +188,30 @@ static void rewrite_count_histogram(Aggref *aggref, List *aid_refs)
188188
append_aid_args(aggref, aid_refs);
189189
}
190190

191-
/* Intended for the denominator in the `avg(col)` rewritten to `sum(col) / count(col)`. */
192-
static Aggref *make_anon_count_value_aggref(const Aggref *source_aggref)
191+
/*
192+
* Intended for the denominator in the `avg(col)` rewritten to `sum(col) / nullif(count(col), 0)`.
193+
* `nullif` is necessary to handle cases where large negative noise brings `count` down to 0
194+
* during global aggregation.
195+
*/
196+
static Expr *make_safe_anon_count_value(const Aggref *source_aggref)
193197
{
194198
Aggref *count_aggref = copyObjectImpl(source_aggref);
195199
count_aggref->aggfnoid = g_oid_cache.anon_count_value;
196200
count_aggref->aggtype = INT8OID;
197201
count_aggref->aggstar = false;
198202
count_aggref->aggdistinct = false;
199-
return count_aggref;
203+
204+
NullIfExpr *nullif = makeNode(NullIfExpr);
205+
nullif->opno = g_oid_cache.op_int8eq;
206+
nullif->opfuncid = F_INT8EQ;
207+
nullif->opresulttype = count_aggref->aggtype;
208+
nullif->opretset = false;
209+
nullif->opcollid = 0;
210+
nullif->inputcollid = 0;
211+
nullif->args = list_make2(count_aggref, make_const_int64(0L));
212+
nullif->location = count_aggref->location;
213+
214+
return (Expr *)nullif;
200215
}
201216

202217
static FuncExpr *make_i4tod(List *args)
@@ -254,11 +269,9 @@ static Node *rewrite_to_avg_aggregator(Aggref *aggref, List *aid_refs)
254269
aggref->aggfnoid = g_oid_cache.anon_sum;
255270
aggref->aggstar = false;
256271
aggref->aggdistinct = false;
257-
258-
Aggref *count_aggref = make_anon_count_value_aggref(aggref);
259-
260272
append_aid_args(aggref, aid_refs);
261-
append_aid_args(count_aggref, aid_refs);
273+
274+
Expr *count_aggref = make_safe_anon_count_value(aggref);
262275

263276
FuncExpr *cast_sum;
264277
FuncExpr *cast_count;
@@ -310,11 +323,9 @@ static Node *rewrite_to_avg_noise_aggregator(Aggref *aggref, List *aid_refs)
310323
aggref->aggtype = FLOAT8OID;
311324
aggref->aggstar = false;
312325
aggref->aggdistinct = false;
313-
314-
Aggref *count_aggref = make_anon_count_value_aggref(aggref);
315-
316326
append_aid_args(aggref, aid_refs);
317-
append_aid_args(count_aggref, aid_refs);
327+
328+
Expr *count_aggref = make_safe_anon_count_value(aggref);
318329

319330
FuncExpr *cast_count = make_i4tod(list_make1(count_aggref));
320331
FuncExpr *division = make_float8div(list_make2(aggref, cast_count));

0 commit comments

Comments
 (0)