Skip to content

Commit e384a96

Browse files
authored
Optimize id() and properties() field access (#2284)
NOTE: This PR was created with AI tools and a human. Optimized id() and properties() field access on vertices and edges. When accessing id(v) or properties(v) on a vertex, the system was generating inefficient SQL that rebuilt the entire vertex agtype before extracting the field: age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels), properties)) This forced full vertex reconstruction for every row, even though the data was already available in table columns. Added optimize_vertex_field_access() in cypher_expr.c to detect these patterns and optimize them to direct column access: - age_id(_agtype_build_vertex(id, ...)) → graphid_to_agtype(id) - age_properties(_agtype_build_vertex(..., props)) → props - age_id(_agtype_build_edge(id, ...)) → graphid_to_agtype(id) - age_start_id(_agtype_build_edge(...)) → graphid_to_agtype(start_id) - age_end_id(_agtype_build_edge(...)) → graphid_to_agtype(end_id) - age_properties(_agtype_build_edge(...)) → props Note: age_label() is intentionally not optimized due to complexity of cstring-to-agtype string conversion. Added regression tests in unified_vertex_table.sql to verify the optimization works correctly for both vertices and edges. modified: regress/expected/unified_vertex_table.out modified: regress/sql/unified_vertex_table.sql modified: src/backend/parser/cypher_expr.c
1 parent 041096f commit e384a96

File tree

3 files changed

+348
-1
lines changed

3 files changed

+348
-1
lines changed

regress/expected/unified_vertex_table.out

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1219,11 +1219,101 @@ SELECT * FROM cypher('unified_test', $$
12191219
RETURN e
12201220
$$) AS (e agtype);
12211221
ERROR: SET/REMOVE label can only be used on vertices
1222+
--
1223+
-- Test 28: Verify id() and properties() optimization
1224+
--
1225+
-- The optimization avoids rebuilding the full vertex agtype when accessing
1226+
-- id() or properties() on a vertex. Instead of:
1227+
-- age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels), properties))
1228+
-- It generates:
1229+
-- graphid_to_agtype(id)
1230+
--
1231+
-- And for properties:
1232+
-- age_properties(_agtype_build_vertex(...))
1233+
-- It generates:
1234+
-- properties (direct column access)
1235+
--
1236+
-- Create test data
1237+
SELECT * FROM cypher('unified_test', $$
1238+
CREATE (:OptimizeTest {val: 1}),
1239+
(:OptimizeTest {val: 2}),
1240+
(:OptimizeTest {val: 3})
1241+
$$) AS (v agtype);
1242+
v
1243+
---
1244+
(0 rows)
1245+
1246+
-- Test that id() works correctly with optimization
1247+
SELECT * FROM cypher('unified_test', $$
1248+
MATCH (n:OptimizeTest)
1249+
RETURN id(n), n.val
1250+
ORDER BY n.val
1251+
$$) AS (id agtype, val agtype);
1252+
id | val
1253+
-------------------+-----
1254+
10977524091715585 | 1
1255+
10977524091715586 | 2
1256+
10977524091715587 | 3
1257+
(3 rows)
1258+
1259+
-- Test that properties() works correctly with optimization
1260+
SELECT * FROM cypher('unified_test', $$
1261+
MATCH (n:OptimizeTest)
1262+
RETURN properties(n), n.val
1263+
ORDER BY n.val
1264+
$$) AS (props agtype, val agtype);
1265+
props | val
1266+
------------+-----
1267+
{"val": 1} | 1
1268+
{"val": 2} | 2
1269+
{"val": 3} | 3
1270+
(3 rows)
1271+
1272+
-- Test id() in WHERE clause (common optimization target)
1273+
SELECT * FROM cypher('unified_test', $$
1274+
MATCH (n:OptimizeTest)
1275+
WHERE id(n) % 10 = 0
1276+
RETURN n.val
1277+
$$) AS (val agtype);
1278+
val
1279+
-----
1280+
(0 rows)
1281+
1282+
-- Test properties() access in expressions
1283+
SELECT * FROM cypher('unified_test', $$
1284+
MATCH (n:OptimizeTest)
1285+
WHERE properties(n).val > 1
1286+
RETURN n.val
1287+
ORDER BY n.val
1288+
$$) AS (val agtype);
1289+
val
1290+
-----
1291+
2
1292+
3
1293+
(2 rows)
1294+
1295+
-- Test edge id/properties optimization
1296+
SELECT * FROM cypher('unified_test', $$
1297+
CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
1298+
$$) AS (v agtype);
1299+
v
1300+
---
1301+
(0 rows)
1302+
1303+
SELECT * FROM cypher('unified_test', $$
1304+
MATCH (a)-[e:OPT_EDGE]->(b)
1305+
RETURN id(e), properties(e), start_id(e), end_id(e)
1306+
$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
1307+
eid | props | sid | eid2
1308+
-------------------+----------------+-------------------+-------------------
1309+
11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
1310+
(1 row)
1311+
12221312
--
12231313
-- Cleanup
12241314
--
12251315
SELECT drop_graph('unified_test', true);
1226-
NOTICE: drop cascades to 38 other objects
1316+
NOTICE: drop cascades to 42 other objects
12271317
DETAIL: drop cascades to table unified_test._ag_label_vertex
12281318
drop cascades to table unified_test._ag_label_edge
12291319
drop cascades to table unified_test."Person"
@@ -1262,6 +1352,10 @@ drop cascades to table unified_test."SameLabel"
12621352
drop cascades to table unified_test."EdgeTest1"
12631353
drop cascades to table unified_test."CONNECTS"
12641354
drop cascades to table unified_test."EdgeTest2"
1355+
drop cascades to table unified_test."OptimizeTest"
1356+
drop cascades to table unified_test."OptStart"
1357+
drop cascades to table unified_test."OPT_EDGE"
1358+
drop cascades to table unified_test."OptEnd"
12651359
NOTICE: graph "unified_test" has been dropped
12661360
drop_graph
12671361
------------

regress/sql/unified_vertex_table.sql

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,67 @@ SELECT * FROM cypher('unified_test', $$
748748
RETURN e
749749
$$) AS (e agtype);
750750

751+
--
752+
-- Test 28: Verify id() and properties() optimization
753+
--
754+
-- The optimization avoids rebuilding the full vertex agtype when accessing
755+
-- id() or properties() on a vertex. Instead of:
756+
-- age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels), properties))
757+
-- It generates:
758+
-- graphid_to_agtype(id)
759+
--
760+
-- And for properties:
761+
-- age_properties(_agtype_build_vertex(...))
762+
-- It generates:
763+
-- properties (direct column access)
764+
--
765+
766+
-- Create test data
767+
SELECT * FROM cypher('unified_test', $$
768+
CREATE (:OptimizeTest {val: 1}),
769+
(:OptimizeTest {val: 2}),
770+
(:OptimizeTest {val: 3})
771+
$$) AS (v agtype);
772+
773+
-- Test that id() works correctly with optimization
774+
SELECT * FROM cypher('unified_test', $$
775+
MATCH (n:OptimizeTest)
776+
RETURN id(n), n.val
777+
ORDER BY n.val
778+
$$) AS (id agtype, val agtype);
779+
780+
-- Test that properties() works correctly with optimization
781+
SELECT * FROM cypher('unified_test', $$
782+
MATCH (n:OptimizeTest)
783+
RETURN properties(n), n.val
784+
ORDER BY n.val
785+
$$) AS (props agtype, val agtype);
786+
787+
-- Test id() in WHERE clause (common optimization target)
788+
SELECT * FROM cypher('unified_test', $$
789+
MATCH (n:OptimizeTest)
790+
WHERE id(n) % 10 = 0
791+
RETURN n.val
792+
$$) AS (val agtype);
793+
794+
-- Test properties() access in expressions
795+
SELECT * FROM cypher('unified_test', $$
796+
MATCH (n:OptimizeTest)
797+
WHERE properties(n).val > 1
798+
RETURN n.val
799+
ORDER BY n.val
800+
$$) AS (val agtype);
801+
802+
-- Test edge id/properties optimization
803+
SELECT * FROM cypher('unified_test', $$
804+
CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
805+
$$) AS (v agtype);
806+
807+
SELECT * FROM cypher('unified_test', $$
808+
MATCH (a)-[e:OPT_EDGE]->(b)
809+
RETURN id(e), properties(e), start_id(e), end_id(e)
810+
$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
811+
751812
--
752813
-- Cleanup
753814
--

src/backend/parser/cypher_expr.c

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ static bool function_exists(char *funcname, char *extension);
116116
static Node *coerce_expr_flexible(ParseState *pstate, Node *expr,
117117
Oid source_oid, Oid target_oid,
118118
int32 t_typemod, bool error_out);
119+
static Node *optimize_vertex_field_access(Node *node);
119120

120121
/* transform a cypher expression */
121122
Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr,
@@ -2082,6 +2083,14 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn)
20822083
retval = ParseFuncOrColumn(pstate, fname, targs, last_srf, fn, false,
20832084
fn->location);
20842085

2086+
/*
2087+
* Optimize vertex field access patterns. This detects cases like:
2088+
* age_id(_agtype_build_vertex(id, label, props))
2089+
* and optimizes them to directly use the underlying column, avoiding
2090+
* the expensive reconstruction of the vertex agtype just to extract a field.
2091+
*/
2092+
retval = optimize_vertex_field_access(retval);
2093+
20852094
/* flag that an aggregate was found during a transform */
20862095
if (retval != NULL && retval->type == T_Aggref)
20872096
{
@@ -2407,3 +2416,186 @@ static Node *transform_SubLink(cypher_parsestate *cpstate, SubLink *sublink)
24072416

24082417
return result;
24092418
}
2419+
/*
2420+
* optimize_vertex_field_access
2421+
*
2422+
* This function optimizes patterns where we're extracting fields from
2423+
* a vertex that was just built from its underlying columns. The most
2424+
* common case is:
2425+
*
2426+
* age_id(_agtype_build_vertex(id, label_name, properties))
2427+
*
2428+
* Which can be optimized to just use 'id' directly (cast to agtype).
2429+
*
2430+
* Similar optimizations apply to:
2431+
* - age_properties(_agtype_build_vertex(...)) -> properties
2432+
* - age_label(_agtype_build_vertex(...)) -> label_name (needs cast)
2433+
*
2434+
* The same optimizations apply to edges with _agtype_build_edge:
2435+
* - age_id(_agtype_build_edge(id, startid, endid, label, props)) -> id
2436+
* - age_start_id(_agtype_build_edge(...)) -> startid
2437+
* - age_end_id(_agtype_build_edge(...)) -> endid
2438+
* - age_properties(_agtype_build_edge(...)) -> props
2439+
* - age_label(_agtype_build_edge(...)) -> label
2440+
*/
2441+
static Node *optimize_vertex_field_access(Node *node)
2442+
{
2443+
FuncExpr *outer_func;
2444+
FuncExpr *inner_func;
2445+
char *outer_func_name;
2446+
char *inner_func_name;
2447+
Node *arg;
2448+
List *inner_args;
2449+
int arg_index = -1;
2450+
Oid result_type;
2451+
bool needs_agtype_cast = false;
2452+
2453+
/* Only optimize FuncExpr nodes */
2454+
if (node == NULL || !IsA(node, FuncExpr))
2455+
{
2456+
return node;
2457+
}
2458+
2459+
outer_func = (FuncExpr *)node;
2460+
2461+
/* Must have exactly one argument */
2462+
if (list_length(outer_func->args) != 1)
2463+
{
2464+
return node;
2465+
}
2466+
2467+
/* Get the function name */
2468+
outer_func_name = get_func_name(outer_func->funcid);
2469+
if (outer_func_name == NULL)
2470+
{
2471+
return node;
2472+
}
2473+
2474+
/* Check if this is an accessor function we can optimize */
2475+
arg = (Node *)linitial(outer_func->args);
2476+
2477+
/* The argument must be a FuncExpr (the build function) */
2478+
if (!IsA(arg, FuncExpr))
2479+
{
2480+
return node;
2481+
}
2482+
2483+
inner_func = (FuncExpr *)arg;
2484+
inner_func_name = get_func_name(inner_func->funcid);
2485+
2486+
if (inner_func_name == NULL)
2487+
{
2488+
return node;
2489+
}
2490+
2491+
inner_args = inner_func->args;
2492+
2493+
/*
2494+
* Check for _agtype_build_vertex(id, label_name, properties)
2495+
* Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties (agtype)
2496+
*
2497+
* Note: We don't optimize age_label() because the label_name is a cstring
2498+
* from _label_name_from_table_oid() and converting it properly to agtype
2499+
* string is non-trivial. The id and properties optimizations are the most
2500+
* impactful for performance anyway.
2501+
*/
2502+
if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
2503+
list_length(inner_args) == 3)
2504+
{
2505+
if (strcmp(outer_func_name, "age_id") == 0)
2506+
{
2507+
/* Extract id (arg 0), needs cast from graphid to agtype */
2508+
arg_index = 0;
2509+
result_type = GRAPHIDOID;
2510+
needs_agtype_cast = true;
2511+
}
2512+
else if (strcmp(outer_func_name, "age_properties") == 0)
2513+
{
2514+
/* Extract properties (arg 2), already agtype */
2515+
arg_index = 2;
2516+
result_type = AGTYPEOID;
2517+
needs_agtype_cast = false;
2518+
}
2519+
/* age_label() is intentionally not optimized - cstring conversion is complex */
2520+
}
2521+
/*
2522+
* Check for _agtype_build_edge(id, startid, endid, label_name, properties)
2523+
* Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
2524+
* 3=label_name (cstring), 4=properties (agtype)
2525+
*
2526+
* Note: Same as vertex, age_label() is not optimized for edges.
2527+
*/
2528+
else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
2529+
list_length(inner_args) == 5)
2530+
{
2531+
if (strcmp(outer_func_name, "age_id") == 0)
2532+
{
2533+
/* Extract id (arg 0), needs cast from graphid to agtype */
2534+
arg_index = 0;
2535+
result_type = GRAPHIDOID;
2536+
needs_agtype_cast = true;
2537+
}
2538+
else if (strcmp(outer_func_name, "age_start_id") == 0)
2539+
{
2540+
/* Extract start_id (arg 1), needs cast from graphid to agtype */
2541+
arg_index = 1;
2542+
result_type = GRAPHIDOID;
2543+
needs_agtype_cast = true;
2544+
}
2545+
else if (strcmp(outer_func_name, "age_end_id") == 0)
2546+
{
2547+
/* Extract end_id (arg 2), needs cast from graphid to agtype */
2548+
arg_index = 2;
2549+
result_type = GRAPHIDOID;
2550+
needs_agtype_cast = true;
2551+
}
2552+
/* age_label() is intentionally not optimized - cstring conversion is complex */
2553+
else if (strcmp(outer_func_name, "age_properties") == 0)
2554+
{
2555+
/* Extract properties (arg 4), already agtype */
2556+
arg_index = 4;
2557+
result_type = AGTYPEOID;
2558+
needs_agtype_cast = false;
2559+
}
2560+
}
2561+
2562+
/* If we found a pattern to optimize */
2563+
if (arg_index >= 0)
2564+
{
2565+
Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);
2566+
2567+
if (needs_agtype_cast)
2568+
{
2569+
/*
2570+
* For graphid: use graphid_to_agtype() function
2571+
* Currently only graphid needs casting - cstring (for labels)
2572+
* is intentionally not optimized.
2573+
*/
2574+
if (result_type == GRAPHIDOID)
2575+
{
2576+
Oid cast_func_oid;
2577+
FuncExpr *cast_expr;
2578+
2579+
/* Get the graphid_to_agtype function OID */
2580+
cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
2581+
GRAPHIDOID);
2582+
2583+
cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
2584+
list_make1(extracted_arg),
2585+
InvalidOid, InvalidOid,
2586+
COERCE_EXPLICIT_CALL);
2587+
cast_expr->location = outer_func->location;
2588+
2589+
return (Node *)cast_expr;
2590+
}
2591+
}
2592+
else
2593+
{
2594+
/* For properties, just return the extracted argument directly */
2595+
return extracted_arg;
2596+
}
2597+
}
2598+
2599+
/* No optimization possible */
2600+
return node;
2601+
}

0 commit comments

Comments
 (0)