Skip to content

Commit d0a2bcb

Browse files
committed
ORCA: Validate hash function existence in IsOpHashJoinable
Fix execution-time error when operators have oprcanhash=true but lack actual hash functions. Previously, ORCA would generate HashAgg plans that failed at runtime with "could not find hash function for hash operator" errors. This occurred when operators were marked with the 'hashes' option but only registered in btree operator families, not hash operator families. The fix adds validation using get_op_hash_functions() to ensure hash functions exist before allowing HashAgg plans, moving error detection from execution to planning time.
1 parent fe6d4db commit d0a2bcb

File tree

3 files changed

+322
-1
lines changed

3 files changed

+322
-1
lines changed

src/backend/gpopt/gpdbwrappers.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,24 @@ gpdb::IsOpHashJoinable(Oid opno, Oid inputtype)
16131613
GP_WRAP_START;
16141614
{
16151615
/* catalog tables: pg_operator */
1616-
return op_hashjoinable(opno, inputtype);
1616+
if (!op_hashjoinable(opno, inputtype))
1617+
return false;
1618+
1619+
/*
1620+
* Even if oprcanhash is true, we need to verify that hash functions
1621+
* actually exist for this operator. This is because oprcanhash can be
1622+
* set to true while the operator is only registered in a btree opfamily
1623+
* and not in a hash opfamily, which would cause execution-time errors
1624+
* when trying to build hash tables.
1625+
*
1626+
* See get_op_hash_functions() in lsyscache.c which requires operators
1627+
* to be registered in a hash opfamily (amopmethod == HASH_AM_OID).
1628+
*/
1629+
RegProcedure hash_proc;
1630+
if (!get_op_hash_functions(opno, &hash_proc, NULL))
1631+
return false;
1632+
1633+
return true;
16171634
}
16181635
GP_WRAP_END;
16191636
return false;

src/test/regress/expected/gp_hashagg.out

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,169 @@ EXPLAIN (COSTS OFF, VERBOSE) :qry;
274274

275275
(1 row)
276276

277+
drop schema if exists tinc_base_types cascade;
278+
NOTICE: schema "tinc_base_types" does not exist, skipping
279+
create schema tinc_base_types;
280+
set search_path=tinc_base_types;
281+
CREATE FUNCTION int42_in(cstring)
282+
RETURNS int42
283+
AS 'int4in'
284+
LANGUAGE internal IMMUTABLE STRICT;
285+
NOTICE: type "int42" is not yet defined
286+
DETAIL: Creating a shell type definition.
287+
CREATE FUNCTION int42_out(int42)
288+
RETURNS cstring
289+
AS 'int4out'
290+
LANGUAGE internal IMMUTABLE STRICT;
291+
NOTICE: argument type int42 is only a shell
292+
CREATE TYPE int42 (
293+
internallength = 4,
294+
input = int42_in,
295+
output = int42_out,
296+
alignment = int4,
297+
default = 42,
298+
passedbyvalue
299+
);
300+
CREATE TABLE aTABLE(k int, a int42);
301+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'k' as the Apache Cloudberry data distribution key for this table.
302+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
303+
INSERT INTO aTABLE VALUES(1, '21');
304+
INSERT INTO aTABLE VALUES(2, '22');
305+
INSERT INTO aTABLE VALUES(3, '23');
306+
INSERT INTO aTABLE VALUES(4, '24');
307+
SELECT * FROM aTABLE;
308+
k | a
309+
---+----
310+
2 | 22
311+
3 | 23
312+
4 | 24
313+
1 | 21
314+
(4 rows)
315+
316+
CREATE OR REPLACE FUNCTION my_lt(int42, int42)
317+
RETURNS boolean AS
318+
'int4lt'
319+
language internal;
320+
CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
321+
RETURNS boolean AS
322+
'int4le'
323+
language internal;
324+
CREATE OR REPLACE FUNCTION my_gt(int42, int42)
325+
RETURNS boolean AS
326+
'int4gt'
327+
language internal;
328+
CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
329+
RETURNS boolean AS
330+
'int4ge'
331+
language internal;
332+
CREATE OR REPLACE FUNCTION my_eq(int42, int42)
333+
RETURNS boolean AS
334+
'int4eq'
335+
language internal;
336+
CREATE OPERATOR < (
337+
leftarg = int42,
338+
rightarg = int42,
339+
procedure = my_lt,
340+
hashes
341+
);
342+
CREATE OPERATOR <= (
343+
leftarg = int42,
344+
rightarg = int42,
345+
procedure = my_lteq,
346+
hashes
347+
);
348+
CREATE OPERATOR = (
349+
leftarg = int42,
350+
rightarg = int42,
351+
procedure = my_eq,
352+
hashes
353+
);
354+
CREATE OPERATOR > (
355+
leftarg = int42,
356+
rightarg = int42,
357+
procedure = my_gt,
358+
hashes
359+
);
360+
CREATE OPERATOR >= (
361+
leftarg = int42,
362+
rightarg = int42,
363+
procedure = my_gteq,
364+
hashes
365+
);
366+
CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
367+
RETURNS int AS
368+
$$
369+
BEGIN
370+
IF $1 < $2 THEN
371+
return -1;
372+
ELSIF $1 = $2 THEN
373+
return 0;
374+
ELSE
375+
return 1;
376+
END IF;
377+
END;
378+
$$ LANGUAGE PLPGSQL;
379+
CREATE OPERATOR CLASS my_operator_class_comp_type
380+
DEFAULT
381+
FOR TYPE int42 USING btree
382+
AS
383+
OPERATOR 1 <,
384+
OPERATOR 2 <=,
385+
OPERATOR 3 =,
386+
OPERATOR 4 >=,
387+
OPERATOR 5 >,
388+
FUNCTION 1 my_comp_func(int42, int42);
389+
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
390+
DEFAULT
391+
FOR TYPE int42 USING hash
392+
AS
393+
OPERATOR 1 <,
394+
FUNCTION 1 my_comp_func(int42, int42);
395+
ERROR: hash function 1 must have one argument
396+
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
397+
FOR TYPE int42 USING hash
398+
AS
399+
OPERATOR 1 =,
400+
FUNCTION 1 my_comp_func(int42, int42);
401+
ERROR: hash function 1 must have one argument
402+
select * from atable group by a,k;
403+
k | a
404+
---+----
405+
1 | 21
406+
2 | 22
407+
3 | 23
408+
4 | 24
409+
(4 rows)
410+
411+
-- Before fix: This would fail at runtime with:
412+
-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118)
413+
-- After fix: ORCA should detect missing hash function at planning time
414+
-- and either use GroupAgg or report a clear planning error
415+
SET optimizer_enable_groupagg=off;
416+
select * from atable group by a,k;
417+
k | a
418+
---+----
419+
1 | 21
420+
2 | 22
421+
3 | 23
422+
4 | 24
423+
(4 rows)
424+
425+
drop schema if exists tinc_base_types cascade;
426+
NOTICE: drop cascades to 16 other objects
427+
DETAIL: drop cascades to function int42_out(int42)
428+
drop cascades to type int42
429+
drop cascades to function int42_in(cstring)
430+
drop cascades to table atable
431+
drop cascades to function my_lt(int42,int42)
432+
drop cascades to function my_lteq(int42,int42)
433+
drop cascades to function my_gt(int42,int42)
434+
drop cascades to function my_gteq(int42,int42)
435+
drop cascades to function my_eq(int42,int42)
436+
drop cascades to operator <(int42,int42)
437+
drop cascades to operator <=(int42,int42)
438+
drop cascades to operator =(int42,int42)
439+
drop cascades to operator >(int42,int42)
440+
drop cascades to operator >=(int42,int42)
441+
drop cascades to function my_comp_func(int42,int42)
442+
drop cascades to operator family my_operator_class_comp_type for access method btree

src/test/regress/sql/gp_hashagg.sql

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,141 @@ HAVING max(c) = '31'
140140
$$ AS qry \gset
141141
EXPLAIN (COSTS OFF, VERBOSE) :qry;
142142
:qry;
143+
144+
drop schema if exists tinc_base_types cascade;
145+
146+
create schema tinc_base_types;
147+
set search_path=tinc_base_types;
148+
149+
150+
CREATE FUNCTION int42_in(cstring)
151+
RETURNS int42
152+
AS 'int4in'
153+
LANGUAGE internal IMMUTABLE STRICT;
154+
CREATE FUNCTION int42_out(int42)
155+
RETURNS cstring
156+
AS 'int4out'
157+
LANGUAGE internal IMMUTABLE STRICT;
158+
CREATE TYPE int42 (
159+
internallength = 4,
160+
input = int42_in,
161+
output = int42_out,
162+
alignment = int4,
163+
default = 42,
164+
passedbyvalue
165+
);
166+
167+
CREATE TABLE aTABLE(k int, a int42);
168+
INSERT INTO aTABLE VALUES(1, '21');
169+
INSERT INTO aTABLE VALUES(2, '22');
170+
INSERT INTO aTABLE VALUES(3, '23');
171+
INSERT INTO aTABLE VALUES(4, '24');
172+
SELECT * FROM aTABLE;
173+
174+
CREATE OR REPLACE FUNCTION my_lt(int42, int42)
175+
RETURNS boolean AS
176+
'int4lt'
177+
language internal;
178+
179+
CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
180+
RETURNS boolean AS
181+
'int4le'
182+
language internal;
183+
184+
CREATE OR REPLACE FUNCTION my_gt(int42, int42)
185+
RETURNS boolean AS
186+
'int4gt'
187+
language internal;
188+
189+
CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
190+
RETURNS boolean AS
191+
'int4ge'
192+
language internal;
193+
194+
CREATE OR REPLACE FUNCTION my_eq(int42, int42)
195+
RETURNS boolean AS
196+
'int4eq'
197+
language internal;
198+
199+
CREATE OPERATOR < (
200+
leftarg = int42,
201+
rightarg = int42,
202+
procedure = my_lt,
203+
hashes
204+
);
205+
206+
CREATE OPERATOR <= (
207+
leftarg = int42,
208+
rightarg = int42,
209+
procedure = my_lteq,
210+
hashes
211+
);
212+
213+
CREATE OPERATOR = (
214+
leftarg = int42,
215+
rightarg = int42,
216+
procedure = my_eq,
217+
hashes
218+
);
219+
220+
CREATE OPERATOR > (
221+
leftarg = int42,
222+
rightarg = int42,
223+
procedure = my_gt,
224+
hashes
225+
);
226+
227+
CREATE OPERATOR >= (
228+
leftarg = int42,
229+
rightarg = int42,
230+
procedure = my_gteq,
231+
hashes
232+
);
233+
234+
CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
235+
RETURNS int AS
236+
$$
237+
BEGIN
238+
IF $1 < $2 THEN
239+
return -1;
240+
ELSIF $1 = $2 THEN
241+
return 0;
242+
ELSE
243+
return 1;
244+
END IF;
245+
END;
246+
$$ LANGUAGE PLPGSQL;
247+
248+
CREATE OPERATOR CLASS my_operator_class_comp_type
249+
DEFAULT
250+
FOR TYPE int42 USING btree
251+
AS
252+
OPERATOR 1 <,
253+
OPERATOR 2 <=,
254+
OPERATOR 3 =,
255+
OPERATOR 4 >=,
256+
OPERATOR 5 >,
257+
FUNCTION 1 my_comp_func(int42, int42);
258+
259+
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
260+
DEFAULT
261+
FOR TYPE int42 USING hash
262+
AS
263+
OPERATOR 1 <,
264+
FUNCTION 1 my_comp_func(int42, int42);
265+
266+
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
267+
FOR TYPE int42 USING hash
268+
AS
269+
OPERATOR 1 =,
270+
FUNCTION 1 my_comp_func(int42, int42);
271+
272+
select * from atable group by a,k;
273+
-- Before fix: This would fail at runtime with:
274+
-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118)
275+
-- After fix: ORCA should detect missing hash function at planning time
276+
-- and either use GroupAgg or report a clear planning error
277+
SET optimizer_enable_groupagg=off;
278+
select * from atable group by a,k;
279+
280+
drop schema if exists tinc_base_types cascade;

0 commit comments

Comments
 (0)