Skip to content

Commit 0e2dd88

Browse files
Added ANTLR parse tree persistent caching for procedures
Task: BABEL-6037 Signed-off-by: Manisha Deshpande <mmdeshp@amazon.com>
1 parent c817e0b commit 0e2dd88

File tree

8 files changed

+1535
-6
lines changed

8 files changed

+1535
-6
lines changed

contrib/babelfishpg_tsql/sql/ownership.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ CREATE TABLE sys.babelfish_function_ext (
4141
create_date SYS.DATETIME NOT NULL,
4242
modify_date SYS.DATETIME NOT NULL,
4343
definition sys.NTEXT DEFAULT NULL,
44+
antlr_parse_tree JSONB DEFAULT NULL, -- JSONB serialized ANTLR parse tree for caching
4445
PRIMARY KEY(funcname, nspname, funcsignature)
4546
);
4647
GRANT SELECT ON sys.babelfish_function_ext TO PUBLIC;

contrib/babelfishpg_tsql/src/catalog.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ typedef FormData_bbf_servers_def *Form_bbf_servers_def;
277277
#define Anum_bbf_function_ext_create_date 8
278278
#define Anum_bbf_function_ext_modify_date 9
279279
#define Anum_bbf_function_ext_definition 10
280-
#define BBF_FUNCTION_EXT_NUM_COLS 10
280+
#define Anum_bbf_function_ext_antlr_parse_tree 11
281+
#define BBF_FUNCTION_EXT_NUM_COLS 11
281282
#define FLAG_IS_ANSI_NULLS_ON (1<<0)
282283
#define FLAG_USES_QUOTED_IDENTIFIER (1<<1)
283284
#define FLAG_CREATED_WITH_RECOMPILE (1<<2)

contrib/babelfishpg_tsql/src/hooks.c

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
#include "bbf_parallel_query.h"
102102
#include "extendedproperty.h"
103103
#include "utils/xml.h"
104+
#include "pltsql_serialize_jsonb.h"
104105

105106
#ifdef USE_LIBXML
106107
#include <libxml/tree.h>
@@ -4102,6 +4103,86 @@ pltsql_store_func_default_positions(ObjectAddress address, List *parameters, con
41024103
new_record_nulls[Anum_bbf_function_ext_definition - 1] = true;
41034104
new_record_replaces[Anum_bbf_function_ext_default_positions - 1] = true;
41044105

4106+
/*
4107+
* Store serialized ANTLR parse result for cross-session caching.
4108+
* Look up the compiled function and serialize its parse tree.
4109+
*/
4110+
{
4111+
PLtsql_func_hashkey hashkey;
4112+
PLtsql_function *function = NULL;
4113+
char *serialized_parse_tree = NULL;
4114+
PLtsql_serial_status status;
4115+
4116+
/* [TODO]: complete hashkey logic
4117+
* Build a hashkey to look up the function in the hash table.
4118+
* Note: This uses a simplified hashkey that may not match all
4119+
* function overloads perfectly (e.g., default parameter values).
4120+
* This is acceptable since we're just trying to find the compiled
4121+
* function to serialize its parse tree.
4122+
*/
4123+
MemSet(&hashkey, 0, sizeof(PLtsql_func_hashkey));
4124+
hashkey.funcOid = address.objectId;
4125+
hashkey.isTrigger = false;
4126+
hashkey.isEventTrigger = false;
4127+
hashkey.trigOid = 0;
4128+
hashkey.inputCollation = 0;
4129+
4130+
if (form_proctup->pronargs > 0)
4131+
{
4132+
/* Copy argument types from proc tuple */
4133+
memcpy(hashkey.argtypes, form_proctup->proargtypes.values,
4134+
form_proctup->pronargs * sizeof(Oid));
4135+
}
4136+
4137+
/* Look up the compiled function in the hash table */
4138+
function = pltsql_HashTableLookup(&hashkey);
4139+
4140+
if (function != NULL && function->action != NULL)
4141+
{
4142+
//[TODO] Save engine version in additional column
4143+
/* Serialize the parse tree to JSON string */
4144+
serialized_parse_tree = pltsql_serialize_stmt_block_jsonb(function->action, &status);
4145+
4146+
if (status == PLTSQL_SERIAL_SUPPORTED && serialized_parse_tree != NULL)
4147+
{
4148+
/* Convert JSON string to JSONB datum and store directly */
4149+
new_record[Anum_bbf_function_ext_antlr_parse_tree - 1] = DirectFunctionCall1(jsonb_in,
4150+
CStringGetDatum(serialized_parse_tree));
4151+
new_record_nulls[Anum_bbf_function_ext_antlr_parse_tree - 1] = false;
4152+
new_record_replaces[Anum_bbf_function_ext_antlr_parse_tree - 1] = true;
4153+
4154+
elog(DEBUG1, "Stored cached parse result for function %u (%zu bytes JSON)",
4155+
address.objectId, strlen(serialized_parse_tree));
4156+
}
4157+
else
4158+
{
4159+
/* Serialization failed or unsupported - skip caching */
4160+
if (status == PLTSQL_SERIAL_UNSUPPORTED)
4161+
{
4162+
elog(DEBUG1, "Parse result caching skipped for function %u: contains unsupported node types",
4163+
address.objectId);
4164+
}
4165+
else if (status == PLTSQL_SERIAL_ERROR)
4166+
{
4167+
elog(DEBUG1, "Failed to serialize parse result for function %u",
4168+
address.objectId);
4169+
}
4170+
/* Set to NULL */
4171+
new_record_nulls[Anum_bbf_function_ext_antlr_parse_tree - 1] = true;
4172+
new_record_replaces[Anum_bbf_function_ext_antlr_parse_tree - 1] = true;
4173+
}
4174+
4175+
if (serialized_parse_tree)
4176+
pfree(serialized_parse_tree);
4177+
}
4178+
else
4179+
{
4180+
/* Function not found in hash table or no action - set to NULL */
4181+
new_record_nulls[Anum_bbf_function_ext_antlr_parse_tree - 1] = true;
4182+
new_record_replaces[Anum_bbf_function_ext_antlr_parse_tree - 1] = true;
4183+
}
4184+
}
4185+
41054186
oldtup = get_bbf_function_tuple_from_proctuple(proctup);
41064187

41074188
if (HeapTupleIsValid(oldtup))
@@ -4139,6 +4220,81 @@ pltsql_store_func_default_positions(ObjectAddress address, List *parameters, con
41394220
table_close(bbf_function_ext_rel, RowExclusiveLock);
41404221
}
41414222

4223+
/*
4224+
* pltsql_restore_func_parse_result
4225+
* Attempt to restore a cached parse result from babelfish_function_ext
4226+
*
4227+
* Retrieves a previously serialized ANTLR parse tree from the catalog and
4228+
* deserializes it back into a PLtsql_stmt_block structure. This allows
4229+
* procedures to skip ANTLR parsing on subsequent executions in new sessions.
4230+
*
4231+
* Parameters:
4232+
* proctup - HeapTuple from pg_proc for the function
4233+
*
4234+
* Returns:
4235+
* PLtsql_stmt_block * - Deserialized parse tree if cache hit
4236+
* NULL - If no cache entry exists, deserialization fails, or during restore
4237+
*/
4238+
PLtsql_stmt_block *
4239+
pltsql_restore_func_parse_result(HeapTuple proctup)
4240+
{
4241+
HeapTuple bbffunctuple;
4242+
Datum cached_parse_tree_datum;
4243+
PLtsql_stmt_block *deserialized;
4244+
PLtsql_serial_status status;
4245+
bool isnull;
4246+
Jsonb *jsonb_data;
4247+
4248+
/* Disallow during restore */
4249+
if (babelfish_dump_restore)
4250+
return NULL;
4251+
4252+
/* 1. Get babelfish_function_ext tuple using existing helper */
4253+
bbffunctuple = get_bbf_function_tuple_from_proctuple(proctup);
4254+
4255+
if (!HeapTupleIsValid(bbffunctuple))
4256+
{
4257+
elog(DEBUG1, "pltsql_restore_func_parse_result: No babelfish_function_ext entry found");
4258+
return NULL;
4259+
}
4260+
4261+
/* 2. Fetch cached_parse_tree column (JSONB type) */
4262+
cached_parse_tree_datum = SysCacheGetAttr(
4263+
PROCNAMENSPSIGNATURE,
4264+
bbffunctuple,
4265+
Anum_bbf_function_ext_antlr_parse_tree,
4266+
&isnull
4267+
);
4268+
4269+
4270+
if (isnull)
4271+
{
4272+
elog(DEBUG1, "pltsql_restore_func_parse_result: No cached parse result in catalog");
4273+
return NULL;
4274+
}
4275+
4276+
/* 3. Get Jsonb pointer directly - detoasts automatically */
4277+
jsonb_data = DatumGetJsonbP(cached_parse_tree_datum);
4278+
4279+
elog(DEBUG1, "pltsql_restore_func_parse_result: Found cached parse result");
4280+
4281+
/* 4. Deserialize directly from Jsonb - efficient, no string conversion */
4282+
deserialized = pltsql_deserialize_stmt_block_from_jsonb(jsonb_data, &status);
4283+
4284+
if (status != PLTSQL_SERIAL_SUPPORTED)
4285+
{
4286+
elog(DEBUG1, "pltsql_restore_func_parse_result: Deserialization failed with status %d", status);
4287+
return NULL;
4288+
}
4289+
4290+
elog(DEBUG1, "pltsql_restore_func_parse_result: Deserialization succeeded (cmd_type=%d, lineno=%d)",
4291+
deserialized->cmd_type, deserialized->lineno);
4292+
4293+
/* Return deserialized result */
4294+
heap_freetuple(bbffunctuple);
4295+
return deserialized;
4296+
}
4297+
41424298
/*
41434299
* Update 'function_args' in 'sys.babelfish_schema_permissions'
41444300
*/

contrib/babelfishpg_tsql/src/hooks.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "tcop/cmdtag.h"
77
#include "utils/pg_locale.h"
88
#include "utils/xml.h"
9+
#include "pltsql.h"
910

1011
extern IsExtendedCatalogHookType PrevIsExtendedCatalogHook;
1112
extern IsToastRelationHookType PrevIsToastRelationHook;
@@ -25,6 +26,7 @@ extern void pltsql_store_func_default_positions(ObjectAddress address,
2526
const char *queryString,
2627
int origname_location,
2728
bool with_recompile);
29+
extern PLtsql_stmt_block *pltsql_restore_func_parse_result(HeapTuple proctup);
2830
extern void alter_bbf_schema_permissions_catalog(ObjectWithArgs *owa,
2931
List *parameters,
3032
int objtypeInt);

contrib/babelfishpg_tsql/src/pl_comp.c

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141

4242
#include "pltsql.h"
4343
#include "pltsql-2.h"
44+
#include "pltsql_serialize_jsonb.h"
45+
#include "hooks.h"
4446
#include "analyzer.h"
4547
#include "catalog.h"
4648
#include "codegen.h"
@@ -137,7 +139,7 @@ static void pltsql_resolve_polymorphic_argtypes(int numargs,
137139
Oid *argtypes, char *argmodes,
138140
Node *call_expr, bool forValidator,
139141
const char *proname);
140-
static PLtsql_function *pltsql_HashTableLookup(PLtsql_func_hashkey *func_key);
142+
PLtsql_function *pltsql_HashTableLookup(PLtsql_func_hashkey *func_key);
141143
static void pltsql_HashTableInsert(PLtsql_function *function,
142144
PLtsql_func_hashkey *func_key);
143145
static void pltsql_HashTableDelete(PLtsql_function *function);
@@ -251,7 +253,7 @@ pltsql_compile(FunctionCallInfo fcinfo, bool forValidator)
251253
forValidator);
252254

253255
/*
254-
* Do the hard part.
256+
* Compile the function (will attempt to use cached parse result if available).
255257
*/
256258
function = do_compile(fcinfo, procTup, function,
257259
&hashkey, forValidator);
@@ -510,6 +512,14 @@ do_compile(FunctionCallInfo fcinfo,
510512
function->fn_input_collation,
511513
NULL);
512514

515+
// /* Debug logging for populateTaxInvDetails parameters */
516+
// if (function->fn_signature &&
517+
// strstr(function->fn_signature, "populatetaxinvdetails") != NULL)
518+
// {
519+
// elog(NOTICE, "Creating param %d: argtypeid=%u, typmod=%d, argdtype->typoid=%u, argdtype->typname=%s",
520+
// i, argtypeid, typmod, argdtype->typoid, argdtype->typname);
521+
// }
522+
513523
/* Disallow pseudotype argument */
514524
/* (note we already replaced polymorphic types) */
515525
/* (build_variable would do this, but wrong message) */
@@ -907,7 +917,30 @@ do_compile(FunctionCallInfo fcinfo,
907917
* Now parse the function's text
908918
*/
909919
{
910-
ANTLR_result result = antlr_parser_cpp(proc_source);
920+
ANTLR_result result;
921+
PLtsql_stmt_block *cached_parse_result = NULL;
922+
923+
/*
924+
* Try to restore cached parse result from previous compilation.
925+
* This allows us to skip expensive ANTLR parsing.
926+
* Skip during validation (forValidator=true) as we're just checking syntax.
927+
*/
928+
if (!forValidator)
929+
{
930+
cached_parse_result = pltsql_restore_func_parse_result(procTup);
931+
932+
if (cached_parse_result) //[TODO] Retrieve antlr parse tree engine version and compare with current engine version
933+
{
934+
elog(DEBUG1, "do_compile: Using cached parse result, skipping ANTLR parsing");
935+
pltsql_parse_result = cached_parse_result;
936+
parse_rc = 0;
937+
// [TODO}: uncomment to execute deserialized parse result
938+
// goto skip_antlr_parsing;
939+
}
940+
}
941+
942+
/* No cache hit - do ANTLR parsing */
943+
result = antlr_parser_cpp(proc_source);
911944

912945
if (result.success)
913946
{
@@ -919,7 +952,8 @@ do_compile(FunctionCallInfo fcinfo,
919952
parse_rc = 1; /* invalid input */
920953
}
921954
}
922-
955+
// skip_antlr_parsing:
956+
/* Continue with normal flow */
923957
if (parse_rc != 0)
924958
elog(ERROR, "pltsql parser returned %d", parse_rc);
925959
function->action = pltsql_parse_result;
@@ -3148,7 +3182,7 @@ pltsql_HashTableInit(void)
31483182
HASH_ELEM | HASH_BLOBS);
31493183
}
31503184

3151-
static PLtsql_function *
3185+
PLtsql_function *
31523186
pltsql_HashTableLookup(PLtsql_func_hashkey *func_key)
31533187
{
31543188
pltsql_HashEnt *hentry;

contrib/babelfishpg_tsql/src/pltsql.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,7 @@ extern PLtsql_condition *pltsql_parse_err_condition(char *condname);
20832083
extern void pltsql_adddatum(PLtsql_datum *newdatum);
20842084
extern int pltsql_add_initdatums(int **varnos);
20852085
extern void pltsql_HashTableInit(void);
2086+
extern PLtsql_function *pltsql_HashTableLookup(PLtsql_func_hashkey *func_key);
20862087
extern void reset_cache(void);
20872088

20882089
/*

0 commit comments

Comments
 (0)