Skip to content

Commit 388959c

Browse files
Merge branch 'main' into feature/column-extraction
2 parents f0d62c0 + 0c74f1b commit 388959c

17 files changed

+545
-70
lines changed

.github/workflows/MainDistributionPipeline.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@ concurrency:
1212
cancel-in-progress: true
1313

1414
jobs:
15-
# duckdb-next-build:
16-
# name: Build extension binaries
17-
# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
18-
# with:
19-
# duckdb_version: main
20-
# ci_tools_version: main
21-
# extension_name: parser_tools
15+
duckdb-next-build:
16+
name: Build extension binaries
17+
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
18+
with:
19+
duckdb_version: main
20+
ci_tools_version: main
21+
extension_name: parser_tools
2222

2323
duckdb-stable-build:
2424
name: Build extension binaries
25-
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0
25+
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0
2626
with:
27-
duckdb_version: v1.3.0
28-
ci_tools_version: v1.3.0
27+
duckdb_version: v1.4.0
28+
ci_tools_version: v1.4.0
2929
extension_name: parser_tools

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
99
project(${TARGET_NAME})
1010
include_directories(src/include)
1111

12-
set(EXTENSION_SOURCES
12+
set(EXTENSION_SOURCES
1313
src/parser_tools_extension.cpp
1414
src/parse_tables.cpp
1515
src/parse_where.cpp
1616
src/parse_functions.cpp
1717
src/parse_columns.cpp
18+
src/parse_statements.cpp
1819
)
1920

2021
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})

README.md

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ An experimental DuckDB extension that exposes functionality from DuckDB's native
44

55
## Overview
66

7-
`parser_tools` is a DuckDB extension designed to provide SQL parsing capabilities within the database. It allows you to analyze SQL queries and extract structural information directly in SQL. This extension provides parsing functions for tables, WHERE clauses, and function calls (see [Functions](#functions) below).
7+
`parser_tools` is a DuckDB extension designed to provide SQL parsing capabilities within the database. It allows you to analyze SQL queries and extract structural information directly in SQL. This extension provides parsing functions for tables, WHERE clauses, function calls, and statements.
88

99
## Features
1010

1111
- **Extract table references** from a SQL query with context information (e.g. `FROM`, `JOIN`, etc.)
1212
- **Extract function calls** from a SQL query with context information (e.g. `SELECT`, `WHERE`, `HAVING`, etc.)
1313
- **Extract column references** from a SQL query with comprehensive dependency tracking
1414
- **Parse WHERE clauses** to extract conditions and operators
15+
- **Parse multi-statement SQL** to extract individual statements or count the number of statements
1516
- Support for **window functions**, **nested functions**, and **CTEs**
1617
- **Alias chain tracking** for complex column dependencies
1718
- **Nested struct field access** parsing (e.g., `table.column.field.subfield`)
@@ -106,7 +107,7 @@ Context helps identify where elements are used in the query.
106107

107108
## Functions
108109

109-
This extension provides parsing functions for tables, functions, columns, and WHERE clauses. Each category includes both table functions (for detailed results) and scalar functions (for programmatic use).
110+
This extension provides parsing functions for tables, functions, columns, statements, and WHERE clauses. Each category includes both table functions (for detailed results) and scalar functions (for programmatic use).
110111

111112
In general, errors (e.g. Parse Exception) will not be exposed to the user, but instead will result in an empty result. This simplifies batch processing. When validity is needed, [is_parsable](#is_parsablesql_query--scalar-function) can be used.
112113

@@ -395,6 +396,92 @@ FROM (VALUES
395396
└───────────────────────────────────────────────┴────────┘
396397
```
397398

399+
---
400+
401+
### Statement Parsing Functions
402+
403+
These functions parse multi-statement SQL strings and extract individual statements or count them.
404+
405+
#### `parse_statements(sql_query)` – Table Function
406+
407+
Parses a SQL string containing multiple statements and returns each statement as a separate row.
408+
409+
##### Usage
410+
```sql
411+
SELECT * FROM parse_statements('SELECT 42; SELECT 43;');
412+
```
413+
414+
##### Returns
415+
A table with:
416+
- `statement`: the SQL statement text
417+
418+
##### Example
419+
```sql
420+
SELECT * FROM parse_statements($$
421+
SELECT * FROM users WHERE active = true;
422+
INSERT INTO log VALUES ('query executed');
423+
SELECT count(*) FROM transactions;
424+
$$);
425+
```
426+
427+
| statement |
428+
|-----------|
429+
| SELECT * FROM users WHERE (active = true) |
430+
| INSERT INTO log (VALUES ('query executed')) |
431+
| SELECT count_star() FROM transactions |
432+
433+
---
434+
435+
#### `parse_statements(sql_query)` – Scalar Function
436+
437+
Returns a list of statement strings from a multi-statement SQL query.
438+
439+
##### Usage
440+
```sql
441+
SELECT parse_statements('SELECT 42; SELECT 43;');
442+
----
443+
[SELECT 42, SELECT 43]
444+
```
445+
446+
##### Returns
447+
A list of strings, each being a SQL statement.
448+
449+
##### Example
450+
```sql
451+
SELECT parse_statements('SELECT 1; INSERT INTO test VALUES (2); SELECT 3;');
452+
----
453+
[SELECT 1, 'INSERT INTO test (VALUES (2))', SELECT 3]
454+
```
455+
456+
---
457+
458+
#### `num_statements(sql_query)` – Scalar Function
459+
460+
Returns the number of statements in a multi-statement SQL query.
461+
462+
##### Usage
463+
```sql
464+
SELECT num_statements('SELECT 42; SELECT 43;');
465+
----
466+
2
467+
```
468+
469+
##### Returns
470+
An integer count of the number of SQL statements.
471+
472+
##### Example
473+
```sql
474+
SELECT num_statements($$
475+
WITH cte AS (SELECT 1) SELECT * FROM cte;
476+
UPDATE users SET last_seen = now();
477+
SELECT count(*) FROM users;
478+
DELETE FROM temp_data;
479+
$$);
480+
----
481+
4
482+
```
483+
484+
---
398485

399486
## Development
400487

duckdb

Submodule duckdb updated 3475 files

src/include/parse_functions.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
namespace duckdb {
88

99
// Forward declarations
10-
class DatabaseInstance;
10+
class ExtensionLoader;
1111

1212
struct FunctionResult {
1313
std::string function_name;
1414
std::string schema;
1515
std::string context; // The context where this function appears (SELECT, WHERE, etc.)
1616
};
1717

18-
void RegisterParseFunctionsFunction(DatabaseInstance &db);
19-
void RegisterParseFunctionScalarFunction(DatabaseInstance &db);
18+
void RegisterParseFunctionsFunction(ExtensionLoader &loader);
19+
void RegisterParseFunctionScalarFunction(ExtensionLoader &loader);
2020

2121
} // namespace duckdb

src/include/parse_statements.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "duckdb.hpp"
4+
#include <string>
5+
#include <vector>
6+
7+
namespace duckdb {
8+
9+
// Forward declarations
10+
class ExtensionLoader;
11+
12+
struct StatementResult {
13+
std::string statement;
14+
};
15+
16+
void RegisterParseStatementsFunction(ExtensionLoader &loader);
17+
void RegisterParseStatementsScalarFunction(ExtensionLoader &loader);
18+
19+
} // namespace duckdb

src/include/parse_tables.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ static void ExtractTablesFromQueryNode(
3333
const duckdb::CommonTableExpressionMap *cte_map = nullptr
3434
);
3535

36-
void RegisterParseTablesFunction(duckdb::DatabaseInstance &db);
37-
void RegisterParseTableScalarFunction(DatabaseInstance &db);
36+
void RegisterParseTablesFunction(duckdb::ExtensionLoader &loader);
37+
void RegisterParseTableScalarFunction(ExtensionLoader &loader);
3838

3939
} // namespace duckdb

src/include/parse_where.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
namespace duckdb {
88

99
// Forward declarations
10-
class DatabaseInstance;
10+
class ExtensionLoader;
1111

1212
struct WhereConditionResult {
1313
std::string condition;
@@ -23,8 +23,8 @@ struct DetailedWhereConditionResult {
2323
std::string context; // The context where this condition appears (WHERE, HAVING, etc.)
2424
};
2525

26-
void RegisterParseWhereFunction(DatabaseInstance &db);
27-
void RegisterParseWhereScalarFunction(DatabaseInstance &db);
28-
void RegisterParseWhereDetailedFunction(DatabaseInstance &db);
26+
void RegisterParseWhereFunction(ExtensionLoader &loader);
27+
void RegisterParseWhereScalarFunction(ExtensionLoader &loader);
28+
void RegisterParseWhereDetailedFunction(ExtensionLoader &loader);
2929

3030
} // namespace duckdb

src/include/parser_tools_extension.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace duckdb {
66

77
class ParserToolsExtension : public Extension {
88
public:
9-
void Load(DuckDB &db) override;
9+
void Load(ExtensionLoader &loader) override;
1010
std::string Name() override;
1111
std::string Version() const override;
1212
};

src/parse_functions.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#include "duckdb.hpp"
33
#include "duckdb/parser/parser.hpp"
44
#include "duckdb/parser/statement/select_statement.hpp"
5+
#include "duckdb/parser/query_node/cte_node.hpp"
56
#include "duckdb/parser/query_node/select_node.hpp"
67
#include "duckdb/parser/expression/function_expression.hpp"
78
#include "duckdb/parser/expression/window_expression.hpp"
89
#include "duckdb/parser/parsed_expression_iterator.hpp"
910
#include "duckdb/parser/result_modifier.hpp"
10-
#include "duckdb/main/extension_util.hpp"
1111
#include "duckdb/function/scalar/nested_functions.hpp"
1212

1313

@@ -202,7 +202,14 @@ static void ExtractFunctionsFromQueryNode(const QueryNode &node, std::vector<Fun
202202
}
203203
}
204204
}
205-
}
205+
// additional step necessary for duckdb v1.4.0: unwrap CTE node
206+
} else if (node.type == QueryNodeType::CTE_NODE) {
207+
auto &cte_node = (CTENode &)node;
208+
209+
if (cte_node.child) {
210+
ExtractFunctionsFromQueryNode(*cte_node.child, results);
211+
}
212+
}
206213
}
207214

208215
static void ExtractFunctionsFromSQL(const std::string &sql, std::vector<FunctionResult> &results) {
@@ -328,15 +335,15 @@ static void ParseFunctionsScalarFunction_struct(DataChunk &args, ExpressionState
328335
// Extension scaffolding
329336
// ---------------------------------------------------
330337

331-
void RegisterParseFunctionsFunction(DatabaseInstance &db) {
338+
void RegisterParseFunctionsFunction(ExtensionLoader &loader) {
332339
TableFunction tf("parse_functions", {LogicalType::VARCHAR}, ParseFunctionsFunction, ParseFunctionsBind, ParseFunctionsInit);
333-
ExtensionUtil::RegisterFunction(db, tf);
340+
loader.RegisterFunction(tf);
334341
}
335342

336-
void RegisterParseFunctionScalarFunction(DatabaseInstance &db) {
343+
void RegisterParseFunctionScalarFunction(ExtensionLoader &loader) {
337344
// parse_function_names is a scalar function that returns a list of function names
338345
ScalarFunction sf("parse_function_names", {LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR), ParseFunctionNamesScalarFunction);
339-
ExtensionUtil::RegisterFunction(db, sf);
346+
loader.RegisterFunction(sf);
340347

341348
// parse_functions_struct is a scalar function that returns a list of structs
342349
auto return_type = LogicalType::LIST(LogicalType::STRUCT({
@@ -345,7 +352,7 @@ void RegisterParseFunctionScalarFunction(DatabaseInstance &db) {
345352
{"context", LogicalType::VARCHAR}
346353
}));
347354
ScalarFunction sf_struct("parse_functions", {LogicalType::VARCHAR}, return_type, ParseFunctionsScalarFunction_struct);
348-
ExtensionUtil::RegisterFunction(db, sf_struct);
355+
loader.RegisterFunction(sf_struct);
349356
}
350357

351358

0 commit comments

Comments
 (0)