From edda2b21473f031207d9a6ce9be3774140332695 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 09:41:37 -0700 Subject: [PATCH 1/7] initial updates to support new extension loader model --- .../workflows/MainDistributionPipeline.yml | 6 ++--- duckdb | 2 +- src/include/parse_functions.hpp | 6 ++--- src/include/parse_tables.hpp | 4 +-- src/include/parse_where.hpp | 8 +++--- src/include/parser_tools_extension.hpp | 2 +- src/parse_functions.cpp | 11 ++++---- src/parse_tables.cpp | 19 +++++++------- src/parse_where.cpp | 13 +++++----- src/parser_tools_extension.cpp | 25 +++++++++---------- 10 files changed, 47 insertions(+), 49 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 9abe192..57e80f5 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -22,8 +22,8 @@ jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 with: - duckdb_version: v1.3.0 - ci_tools_version: v1.3.0 + duckdb_version: v1.4.0 + ci_tools_version: v1.4.0 extension_name: parser_tools diff --git a/duckdb b/duckdb index 71c5c07..b8a06e4 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 71c5c07cdd295e9409c0505885033ae9eb6b5ddd +Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e diff --git a/src/include/parse_functions.hpp b/src/include/parse_functions.hpp index 7ef020a..fe3167f 100644 --- a/src/include/parse_functions.hpp +++ b/src/include/parse_functions.hpp @@ -7,7 +7,7 @@ namespace duckdb { // Forward declarations -class DatabaseInstance; +class ExtensionLoader; struct FunctionResult { std::string function_name; @@ -15,7 +15,7 @@ struct FunctionResult { std::string context; // The context where this function appears (SELECT, WHERE, etc.) }; -void RegisterParseFunctionsFunction(DatabaseInstance &db); -void RegisterParseFunctionScalarFunction(DatabaseInstance &db); +void RegisterParseFunctionsFunction(ExtensionLoader &loader); +void RegisterParseFunctionScalarFunction(ExtensionLoader &loader); } // namespace duckdb \ No newline at end of file diff --git a/src/include/parse_tables.hpp b/src/include/parse_tables.hpp index 00fce3c..36f6e71 100644 --- a/src/include/parse_tables.hpp +++ b/src/include/parse_tables.hpp @@ -33,7 +33,7 @@ static void ExtractTablesFromQueryNode( const duckdb::CommonTableExpressionMap *cte_map = nullptr ); -void RegisterParseTablesFunction(duckdb::DatabaseInstance &db); -void RegisterParseTableScalarFunction(DatabaseInstance &db); +void RegisterParseTablesFunction(duckdb::ExtensionLoader &loader); +void RegisterParseTableScalarFunction(ExtensionLoader &loader); } // namespace duckdb diff --git a/src/include/parse_where.hpp b/src/include/parse_where.hpp index bb32aa3..c249072 100644 --- a/src/include/parse_where.hpp +++ b/src/include/parse_where.hpp @@ -7,7 +7,7 @@ namespace duckdb { // Forward declarations -class DatabaseInstance; +class ExtensionLoader; struct WhereConditionResult { std::string condition; @@ -23,8 +23,8 @@ struct DetailedWhereConditionResult { std::string context; // The context where this condition appears (WHERE, HAVING, etc.) }; -void RegisterParseWhereFunction(DatabaseInstance &db); -void RegisterParseWhereScalarFunction(DatabaseInstance &db); -void RegisterParseWhereDetailedFunction(DatabaseInstance &db); +void RegisterParseWhereFunction(ExtensionLoader &loader); +void RegisterParseWhereScalarFunction(ExtensionLoader &loader); +void RegisterParseWhereDetailedFunction(ExtensionLoader &loader); } // namespace duckdb \ No newline at end of file diff --git a/src/include/parser_tools_extension.hpp b/src/include/parser_tools_extension.hpp index 91f5cf3..d9b74e5 100644 --- a/src/include/parser_tools_extension.hpp +++ b/src/include/parser_tools_extension.hpp @@ -6,7 +6,7 @@ namespace duckdb { class ParserToolsExtension : public Extension { public: - void Load(DuckDB &db) override; + void Load(ExtensionLoader &loader) override; std::string Name() override; std::string Version() const override; }; diff --git a/src/parse_functions.cpp b/src/parse_functions.cpp index 3423964..9886fb0 100644 --- a/src/parse_functions.cpp +++ b/src/parse_functions.cpp @@ -7,7 +7,6 @@ #include "duckdb/parser/expression/window_expression.hpp" #include "duckdb/parser/parsed_expression_iterator.hpp" #include "duckdb/parser/result_modifier.hpp" -#include "duckdb/main/extension_util.hpp" #include "duckdb/function/scalar/nested_functions.hpp" @@ -328,15 +327,15 @@ static void ParseFunctionsScalarFunction_struct(DataChunk &args, ExpressionState // Extension scaffolding // --------------------------------------------------- -void RegisterParseFunctionsFunction(DatabaseInstance &db) { +void RegisterParseFunctionsFunction(ExtensionLoader &loader) { TableFunction tf("parse_functions", {LogicalType::VARCHAR}, ParseFunctionsFunction, ParseFunctionsBind, ParseFunctionsInit); - ExtensionUtil::RegisterFunction(db, tf); + loader.RegisterFunction(tf); } -void RegisterParseFunctionScalarFunction(DatabaseInstance &db) { +void RegisterParseFunctionScalarFunction(ExtensionLoader &loader) { // parse_function_names is a scalar function that returns a list of function names ScalarFunction sf("parse_function_names", {LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseFunctionNamesScalarFunction); - ExtensionUtil::RegisterFunction(db, sf); + loader.RegisterFunction(sf); // parse_functions_struct is a scalar function that returns a list of structs auto return_type = LogicalType::LIST(LogicalType::STRUCT({ @@ -345,7 +344,7 @@ void RegisterParseFunctionScalarFunction(DatabaseInstance &db) { {"context", LogicalType::VARCHAR} })); ScalarFunction sf_struct("parse_functions", {LogicalType::VARCHAR}, return_type, ParseFunctionsScalarFunction_struct); - ExtensionUtil::RegisterFunction(db, sf_struct); + loader.RegisterFunction(sf_struct); } diff --git a/src/parse_tables.cpp b/src/parse_tables.cpp index 506beb3..c455c62 100644 --- a/src/parse_tables.cpp +++ b/src/parse_tables.cpp @@ -1,12 +1,14 @@ #include "parse_tables.hpp" #include "duckdb.hpp" #include "duckdb/parser/parser.hpp" +#include "duckdb/parser/parser_options.hpp" +#include +#include #include "duckdb/parser/statement/select_statement.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" -#include "duckdb/main/extension_util.hpp" #include "duckdb/function/scalar/nested_functions.hpp" @@ -152,9 +154,8 @@ static void ExtractTablesFromSQL(const std::string &sql, std::vectortype == StatementType::SELECT_STATEMENT) { @@ -323,19 +324,19 @@ static void IsParsableFunction(DataChunk &args, ExpressionState &state, Vector & // Extension scaffolding // --------------------------------------------------- -void RegisterParseTablesFunction(DatabaseInstance &db) { +void RegisterParseTablesFunction(ExtensionLoader &loader) { TableFunction tf("parse_tables", {LogicalType::VARCHAR}, ParseTablesFunction, ParseTablesBind, ParseTablesInit); - ExtensionUtil::RegisterFunction(db, tf); + loader.RegisterFunction(tf); } -void RegisterParseTableScalarFunction(DatabaseInstance &db) { +void RegisterParseTableScalarFunction(ExtensionLoader &loader) { // parse_table_names is overloaded, allowing for an optional boolean argument // that indicates whether to include CTEs in the result // usage: parse_tables(sql_query [, include_cte]) ScalarFunctionSet set("parse_table_names"); set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseTablesScalarFunction)); set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BOOLEAN}, LogicalType::LIST(LogicalType::VARCHAR), ParseTablesScalarFunction)); - ExtensionUtil::RegisterFunction(db, set); + loader.RegisterFunction(set); // parse_tables_struct is a scalar function that returns a list of structs auto return_type = LogicalType::LIST(LogicalType::STRUCT({ @@ -344,11 +345,11 @@ void RegisterParseTableScalarFunction(DatabaseInstance &db) { {"context", LogicalType::VARCHAR} })); ScalarFunction sf("parse_tables", {LogicalType::VARCHAR}, return_type, ParseTablesScalarFunction_struct); - ExtensionUtil::RegisterFunction(db, sf); + loader.RegisterFunction(sf); // is_parsable is a scalar function that returns a boolean indicating whether the SQL query is parsable (no parse errors) ScalarFunction is_parsable("is_parsable", {LogicalType::VARCHAR}, LogicalType::BOOLEAN, IsParsableFunction); - ExtensionUtil::RegisterFunction(db, is_parsable); + loader.RegisterFunction(is_parsable); } } // namespace duckdb diff --git a/src/parse_where.cpp b/src/parse_where.cpp index 7f5ea41..cb2d281 100644 --- a/src/parse_where.cpp +++ b/src/parse_where.cpp @@ -19,7 +19,6 @@ #include "duckdb/parser/expression/positional_reference_expression.hpp" #include "duckdb/parser/expression/parameter_expression.hpp" #include "duckdb/parser/tableref/basetableref.hpp" -#include "duckdb/main/extension_util.hpp" namespace duckdb { @@ -236,19 +235,19 @@ static void ParseWhereScalarFunction(DataChunk &args, ExpressionState &state, Ve }); } -void RegisterParseWhereFunction(DatabaseInstance &db) { +void RegisterParseWhereFunction(ExtensionLoader &loader) { TableFunction tf("parse_where", {LogicalType::VARCHAR}, ParseWhereFunction, ParseWhereBind, ParseWhereInit); - ExtensionUtil::RegisterFunction(db, tf); + loader.RegisterFunction(tf); } -void RegisterParseWhereScalarFunction(DatabaseInstance &db) { +void RegisterParseWhereScalarFunction(ExtensionLoader &loader) { auto return_type = LogicalType::LIST(LogicalType::STRUCT({ {"condition", LogicalType::VARCHAR}, {"table_name", LogicalType::VARCHAR}, {"context", LogicalType::VARCHAR} })); ScalarFunction sf("parse_where", {LogicalType::VARCHAR}, return_type, ParseWhereScalarFunction); - ExtensionUtil::RegisterFunction(db, sf); + loader.RegisterFunction(sf); } static string DetailedExpressionTypeToOperator(ExpressionType type) { @@ -476,9 +475,9 @@ static void ParseWhereDetailedFunction(ClientContext &context, state.row++; } -void RegisterParseWhereDetailedFunction(DatabaseInstance &db) { +void RegisterParseWhereDetailedFunction(ExtensionLoader &loader) { TableFunction tf("parse_where_detailed", {LogicalType::VARCHAR}, ParseWhereDetailedFunction, ParseWhereDetailedBind, ParseWhereDetailedInit); - ExtensionUtil::RegisterFunction(db, tf); + loader.RegisterFunction(tf); } } // namespace duckdb diff --git a/src/parser_tools_extension.cpp b/src/parser_tools_extension.cpp index 385526c..04d79c6 100644 --- a/src/parser_tools_extension.cpp +++ b/src/parser_tools_extension.cpp @@ -22,18 +22,18 @@ namespace duckdb { // --------------------------------------------------- // EXTENSION SCAFFOLDING -static void LoadInternal(DatabaseInstance &instance) { - RegisterParseTablesFunction(instance); - RegisterParseTableScalarFunction(instance); - RegisterParseWhereFunction(instance); - RegisterParseWhereScalarFunction(instance); - RegisterParseWhereDetailedFunction(instance); - RegisterParseFunctionsFunction(instance); - RegisterParseFunctionScalarFunction(instance); +static void LoadInternal(ExtensionLoader &loader) { + RegisterParseTablesFunction(loader); + RegisterParseTableScalarFunction(loader); + RegisterParseWhereFunction(loader); + RegisterParseWhereScalarFunction(loader); + RegisterParseWhereDetailedFunction(loader); + RegisterParseFunctionsFunction(loader); + RegisterParseFunctionScalarFunction(loader); } -void ParserToolsExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); +void ParserToolsExtension::Load(ExtensionLoader &loader) { + LoadInternal(loader); } std::string ParserToolsExtension::Name() { @@ -52,9 +52,8 @@ std::string ParserToolsExtension::Version() const { extern "C" { -DUCKDB_EXTENSION_API void parser_tools_init(duckdb::DatabaseInstance &db) { - duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); +DUCKDB_CPP_EXTENSION_ENTRY(parser_tools, loader) { + duckdb::LoadInternal(loader); } DUCKDB_EXTENSION_API const char *parser_tools_version() { From d3ec818b7e2001a6f0087988391b0820fb570513 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 09:51:39 -0700 Subject: [PATCH 2/7] more 1.4 changes --- src/parser_tools_extension.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/parser_tools_extension.cpp b/src/parser_tools_extension.cpp index 04d79c6..a324102 100644 --- a/src/parser_tools_extension.cpp +++ b/src/parser_tools_extension.cpp @@ -56,11 +56,4 @@ DUCKDB_CPP_EXTENSION_ENTRY(parser_tools, loader) { duckdb::LoadInternal(loader); } -DUCKDB_EXTENSION_API const char *parser_tools_version() { - return duckdb::DuckDB::LibraryVersion(); } -} - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif From 234a363755aa148f4fdd0e35cb399f636ee4c67b Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 11:27:30 -0700 Subject: [PATCH 3/7] fix parse_table_names upgrade issues --- src/parse_tables.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/parse_tables.cpp b/src/parse_tables.cpp index c455c62..fdd20c0 100644 --- a/src/parse_tables.cpp +++ b/src/parse_tables.cpp @@ -6,12 +6,12 @@ #include #include "duckdb/parser/statement/select_statement.hpp" #include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/parser/query_node/cte_node.hpp" #include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/function/scalar/nested_functions.hpp" - namespace duckdb { inline const char *ToString(TableContext context) { @@ -130,7 +130,7 @@ static void ExtractTablesFromQueryNode( if (node.type == QueryNodeType::SELECT_NODE) { auto &select_node = (SelectNode &)node; - // Emit CTE definitions + // Handle CTE definitions for (const auto &entry : select_node.cte_map.map) { results.push_back(TableRefResult{ "", entry.first, TableContext::CTE @@ -144,6 +144,23 @@ static void ExtractTablesFromQueryNode( if (select_node.from_table) { ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map); } + } + // for ctes, we need an extra step to extract the cte body, and then the rest of the statement + // don't actually record any details from this node in the result otherwise it will be duplicated in the recursive calls below. + else if (node.type == QueryNodeType::CTE_NODE) { + auto &cte_node = (CTENode &)node; + + // Extract tables from the CTE query definition + if (cte_node.query) { + ExtractTablesFromQueryNode(*cte_node.query, results, TableContext::From, cte_map); + } + + // Extract tables from the child query (the main query that uses the CTE) + if (cte_node.child) { + // Pass the existing CTE map to the child query + // The current CTE will be available for reference in the child + ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map); + } } } From 4373345d361f955b634410596d154052ddf46ae3 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 11:38:21 -0700 Subject: [PATCH 4/7] remove duplicate cte parsing --- src/parse_tables.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parse_tables.cpp b/src/parse_tables.cpp index fdd20c0..2bdc661 100644 --- a/src/parse_tables.cpp +++ b/src/parse_tables.cpp @@ -150,11 +150,6 @@ static void ExtractTablesFromQueryNode( else if (node.type == QueryNodeType::CTE_NODE) { auto &cte_node = (CTENode &)node; - // Extract tables from the CTE query definition - if (cte_node.query) { - ExtractTablesFromQueryNode(*cte_node.query, results, TableContext::From, cte_map); - } - // Extract tables from the child query (the main query that uses the CTE) if (cte_node.child) { // Pass the existing CTE map to the child query From 6dda0bb0fbf814027210ef431f1a095b7a9a4c74 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 11:41:44 -0700 Subject: [PATCH 5/7] simplify comments --- src/parse_tables.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parse_tables.cpp b/src/parse_tables.cpp index 2bdc661..d902364 100644 --- a/src/parse_tables.cpp +++ b/src/parse_tables.cpp @@ -145,15 +145,11 @@ static void ExtractTablesFromQueryNode( ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map); } } - // for ctes, we need an extra step to extract the cte body, and then the rest of the statement - // don't actually record any details from this node in the result otherwise it will be duplicated in the recursive calls below. + // additional step necessary for duckdb v1.4.0: unwrap CTE node else if (node.type == QueryNodeType::CTE_NODE) { auto &cte_node = (CTENode &)node; - // Extract tables from the child query (the main query that uses the CTE) if (cte_node.child) { - // Pass the existing CTE map to the child query - // The current CTE will be available for reference in the child ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map); } } From bb8eb02cceb250437ffb0b61e739b8e538857b62 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 11:42:04 -0700 Subject: [PATCH 6/7] fix parse_functions upgrade issues --- src/parse_functions.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parse_functions.cpp b/src/parse_functions.cpp index 9886fb0..394a7d7 100644 --- a/src/parse_functions.cpp +++ b/src/parse_functions.cpp @@ -2,6 +2,7 @@ #include "duckdb.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/parser/statement/select_statement.hpp" +#include "duckdb/parser/query_node/cte_node.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/expression/window_expression.hpp" @@ -201,7 +202,14 @@ static void ExtractFunctionsFromQueryNode(const QueryNode &node, std::vector &results) { From 3981edb15f82e41b3955c75840c72e5a9f6bbc8d Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 19 Sep 2025 11:43:04 -0700 Subject: [PATCH 7/7] re-enable next build --- .github/workflows/MainDistributionPipeline.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 57e80f5..6d0a4a9 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -12,13 +12,13 @@ concurrency: cancel-in-progress: true jobs: -# duckdb-next-build: -# name: Build extension binaries -# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main -# with: -# duckdb_version: main -# ci_tools_version: main -# extension_name: parser_tools + duckdb-next-build: + name: Build extension binaries + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main + with: + duckdb_version: main + ci_tools_version: main + extension_name: parser_tools duckdb-stable-build: name: Build extension binaries