Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
acda61a
Update duckdb to ib6513473ba48f46c39c8a9d506e4ad32b7a6f2ae commmit
mkaruza Jan 20, 2025
a2f32a6
Update to DuckDB 1.2
mkaruza Jan 20, 2025
15d5300
Update to DuckDB 1.2 and fix regression tests
mkaruza Jan 20, 2025
09986a1
Disable regression test that work with extensions
mkaruza Jan 20, 2025
3ae7f5f
Formating
mkaruza Jan 20, 2025
9556f29
pytest fix
mkaruza Jan 20, 2025
b16aed9
Iterate recursively push down filter and construct them
mkaruza Jan 23, 2025
912d09f
Update duckdb to `V1.2 histrionicus` commit
mkaruza Jan 23, 2025
f556695
Update DUCKDB_VERSION
mkaruza Jan 23, 2025
c0b9a32
Don't throw if unsupported filter is optional filter
mkaruza Jan 27, 2025
9abe5b4
Formating
mkaruza Jan 27, 2025
b912069
Missing parenthesis so formating back
mkaruza Jan 27, 2025
f075a41
Add DYNAMIC_FILTER and handle as switch in other unsupported filter
mkaruza Jan 27, 2025
22d3048
Merge remote-tracking branch 'origin' into duckdb-1.2
JelteF Feb 4, 2025
264da7a
Rename to is_inside_optional_filter
JelteF Feb 4, 2025
2e0597a
Correctly pass on is_inside_optional_filter
JelteF Feb 4, 2025
9c2a42b
Update duckdb to latest v1.2-histrionicus branch commit
JelteF Feb 4, 2025
8030edd
Downgrade to latest nightly build
JelteF Feb 4, 2025
f47c105
Disable tests that fail due to extensions not being available
JelteF Feb 4, 2025
47eef96
Update to official v1.2.0 release
JelteF Feb 5, 2025
3aa062f
Enable extension tests
JelteF Feb 5, 2025
5be0124
Updated to Duckdb 1.2 TAG commit
mkaruza Feb 6, 2025
b1c162e
Only format C/C++ files with clang-format
JelteF Feb 6, 2025
0889841
Make clang-format happy
JelteF Feb 6, 2025
fa5f9ec
Update non_superuser test output for 1.2.0
JelteF Feb 6, 2025
753b0ef
Use duckdgq as an example community extension, avro is not released f…
JelteF Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ OBJS += $(subst .c,.o, $(C_SRCS))
# set to `make` to disable ninja
DUCKDB_GEN ?= ninja
# used to know what version of extensions to download
DUCKDB_VERSION = v1.1.3
DUCKDB_VERSION = v1.2.0
# duckdb build tweaks
DUCKDB_CMAKE_VARS = -DBUILD_SHELL=0 -DBUILD_PYTHON=0 -DBUILD_UNITTESTS=0
# set to 1 to disable asserts in DuckDB. This is particularly useful in combinition with MotherDuck.
Expand Down
7 changes: 5 additions & 2 deletions include/pgduckdb/scan/postgres_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ struct PostgresScanGlobalState : public duckdb::GlobalTableFunctionState {
}
void ConstructTableScanQuery(const duckdb::TableFunctionInitInput &input);

private:
int ExtractQueryFilters(duckdb::TableFilter *filter, const char *column_name, duckdb::string &filters,
bool is_optional_filter_parent);

public:
Snapshot snapshot;
Relation rel;
Expand Down Expand Up @@ -50,7 +54,6 @@ struct PostgresScanLocalState : public duckdb::LocalTableFunctionState {
struct PostgresScanFunctionData : public duckdb::TableFunctionData {
PostgresScanFunctionData(Relation rel, uint64_t cardinality, Snapshot snapshot);
~PostgresScanFunctionData() override;

duckdb::vector<duckdb::string> complex_filters;
Relation rel;
uint64_t cardinality;
Expand All @@ -74,7 +77,7 @@ struct PostgresScanTableFunction : public duckdb::TableFunction {

static duckdb::unique_ptr<duckdb::NodeStatistics> PostgresScanCardinality(duckdb::ClientContext &context,
const duckdb::FunctionData *data);
static std::string ToString(const duckdb::FunctionData *bind_data);
static duckdb::InsertionOrderPreservingMap<duckdb::string> ToString(duckdb::TableFunctionToStringInput &input);
};

} // namespace pgduckdb
8 changes: 4 additions & 4 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
break;
}
case TIMESTAMPTZOID: {
duckdb::timestamp_t timestamp = value.GetValue<duckdb::timestamp_t>();
duckdb::timestamp_tz_t timestamp = value.GetValue<duckdb::timestamp_tz_t>();
slot->tts_values[col] = timestamp.value - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
break;
}
Expand Down Expand Up @@ -1187,7 +1187,7 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) {
return duckdb::Value::TIMESTAMP(duckdb::timestamp_t(DatumGetTimestamp(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
case TIMESTAMPTZOID:
return duckdb::Value::TIMESTAMPTZ(
duckdb::timestamp_t(DatumGetTimestampTz(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
duckdb::timestamp_tz_t(DatumGetTimestampTz(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
case FLOAT4OID:
return duckdb::Value::FLOAT(DatumGetFloat4(value));
case FLOAT8OID:
Expand Down Expand Up @@ -1254,8 +1254,8 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i
result, duckdb::timestamp_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
break;
case duckdb::LogicalTypeId::TIMESTAMP_TZ:
Append<duckdb::timestamp_t>(
result, duckdb::timestamp_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
Append<duckdb::timestamp_tz_t>(
result, duckdb::timestamp_tz_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
break;
case duckdb::LogicalTypeId::FLOAT:
Append<float>(result, DatumGetFloat4(value), offset);
Expand Down
96 changes: 76 additions & 20 deletions src/scan/postgres_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include "duckdb/planner/filter/optional_filter.hpp"

#include "pgduckdb/scan/postgres_scan.hpp"
#include "pgduckdb/scan/postgres_table_reader.hpp"
#include "pgduckdb/pgduckdb_types.hpp"
Expand All @@ -7,12 +9,70 @@
#include "pgduckdb/pgduckdb_process_lock.hpp"
#include "pgduckdb/logger.hpp"

#include <numeric> // std::accumulate

namespace pgduckdb {

//
// PostgresScanGlobalState
//

static duckdb::string
FilterJoin(duckdb::vector<duckdb::string> &filters, duckdb::string &&delimiter) {
return std::accumulate(filters.begin() + 1, filters.end(), filters[0],
[&delimiter](duckdb::string l, duckdb::string r) { return l + delimiter + r; });
}

int
PostgresScanGlobalState::ExtractQueryFilters(duckdb::TableFilter *filter, const char *column_name,
duckdb::string &query_filters, bool is_optional_filter_parent) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: "parent" seems like the wrong naming imo. Maybe one of these names instead:

Suggested change
duckdb::string &query_filters, bool is_optional_filter_parent) {
duckdb::string &query_filters, bool is_optional_filter_child) {
Suggested change
duckdb::string &query_filters, bool is_optional_filter_parent) {
duckdb::string &query_filters, bool is_inside_optional_filter) {

switch (filter->filter_type) {
case duckdb::TableFilterType::CONSTANT_COMPARISON:
case duckdb::TableFilterType::IS_NULL:
case duckdb::TableFilterType::IS_NOT_NULL:
case duckdb::TableFilterType::IN_FILTER: {
query_filters += filter->ToString(column_name).c_str();
return 1;
}
case duckdb::TableFilterType::CONJUNCTION_OR:
case duckdb::TableFilterType::CONJUNCTION_AND: {
auto conjuction_filter = reinterpret_cast<duckdb::ConjunctionFilter *>(filter);
duckdb::vector<std::string> conjuction_child_filters;
for (idx_t i = 0; i < conjuction_filter->child_filters.size(); i++) {
std::string child_filter;
if (ExtractQueryFilters(conjuction_filter->child_filters[i].get(), column_name, child_filter, false)) {
conjuction_child_filters.emplace_back(child_filter);
}
}
duckdb::string conjuction_delimiter =
filter->filter_type == duckdb::TableFilterType::CONJUNCTION_OR ? " OR " : " AND ";
if (conjuction_child_filters.size()) {
query_filters += "(" + FilterJoin(conjuction_child_filters, std::move(conjuction_delimiter)) + ")";
}
return conjuction_child_filters.size();
}
case duckdb::TableFilterType::OPTIONAL_FILTER: {
auto optional_filter = reinterpret_cast<duckdb::OptionalFilter *>(filter);
return ExtractQueryFilters(optional_filter->child_filter.get(), column_name, query_filters, true);
}
/* DYNAMIC_FILTER is push down filter from topN execution. STRUCT_EXTRACT is
* only received if struct_extract function is used. Default will catch all
* filter that could be added in future in DuckDB.
*/
case duckdb::TableFilterType::DYNAMIC_FILTER:
case duckdb::TableFilterType::STRUCT_EXTRACT:
default: {
if (is_optional_filter_parent) {
pd_log(DEBUG1, "(DuckDB/ExtractQueryFilters) Unsupported optional filter: %s",
filter->ToString(column_name).c_str());
return 0;
}
throw duckdb::Exception(duckdb::ExceptionType::EXECUTOR,
"Invalid Filter Type: " + filter->ToString(column_name));
}
}
}

void
PostgresScanGlobalState::ConstructTableScanQuery(const duckdb::TableFunctionInitInput &input) {
/* SELECT COUNT(*) FROM */
Expand Down Expand Up @@ -82,27 +142,23 @@ PostgresScanGlobalState::ConstructTableScanQuery(const duckdb::TableFunctionInit

scan_query << " FROM " << GenerateQualifiedRelationName(rel);

first = true;

duckdb::vector<duckdb::string> query_filters;
for (auto const &[attr_num, duckdb_scanned_index] : columns_to_scan) {
auto filter = column_filters[duckdb_scanned_index];

if (!filter) {
continue;
}

if (first) {
scan_query << " WHERE ";
} else {
scan_query << " AND ";
}

first = false;
scan_query << "(";
duckdb::string column_query_filters;
auto attr = GetAttr(table_tuple_desc, attr_num - 1);
auto col = pgduckdb::QuoteIdentifier(GetAttName(attr));
scan_query << filter->ToString(col).c_str();
scan_query << ") ";
if (ExtractQueryFilters(filter, col, column_query_filters, false)) {
query_filters.emplace_back(column_query_filters);
};
}

if (query_filters.size()) {
scan_query << " WHERE ";
scan_query << FilterJoin(query_filters, " AND ");
}
}

Expand Down Expand Up @@ -157,12 +213,12 @@ PostgresScanTableFunction::PostgresScanTableFunction()
to_string = ToString;
}

std::string
PostgresScanTableFunction::ToString(const duckdb::FunctionData *data) {
auto &bind_data = data->Cast<PostgresScanFunctionData>();
std::ostringstream oss;
oss << "(POSTGRES_SCAN) " << GetRelationName(bind_data.rel);
return oss.str();
duckdb::InsertionOrderPreservingMap<duckdb::string>
PostgresScanTableFunction::ToString(duckdb::TableFunctionToStringInput &input) {
auto &bind_data = input.bind_data->Cast<PostgresScanFunctionData>();
duckdb::InsertionOrderPreservingMap<duckdb::string> result;
result["Table"] = GetRelationName(bind_data.rel);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe interesting to note that it is a PG table?

Suggested change
result["Table"] = GetRelationName(bind_data.rel);
result["Table"] = GetRelationName(bind_data.rel) + " (PG)";

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explain already show it is POSTGRES_SCAN node. We can add it if needed.

Copy link
Copy Markdown
Collaborator

@JelteF JelteF Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't show that for explain analyze for some reason I think.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return result;
}

duckdb::unique_ptr<duckdb::GlobalTableFunctionState>
Expand Down
6 changes: 3 additions & 3 deletions test/pycheck/explain_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_explain(cur: Cursor):
result = cur.sql("EXPLAIN SELECT count(*) FROM test_table where id = %s", (1,))
plan = "\n".join(result)
assert "UNGROUPED_AGGREGATE" in plan
assert "id=1 AND id IS NOT NULL" in plan
assert "id=1" in plan
assert "Total Time:" not in plan
assert "Output:" not in plan

Expand All @@ -37,7 +37,7 @@ def test_explain(cur: Cursor):
)
plan = "\n".join(result)
assert "UNGROUPED_AGGREGATE" in plan
assert "id=1 AND id IS NOT NULL" in plan
assert "id=1" in plan
assert "Total Time:" in plan
assert "Output:" not in plan

Expand Down Expand Up @@ -66,7 +66,7 @@ def test_explain_ctas(cur: Cursor):
"EXPLAIN ANALYZE CREATE TEMP TABLE heap2(id) AS SELECT * from heap1"
)
plan = "\n".join(result)
assert "POSTGRES_SCAN" in plan
assert "TABLE_SCAN" in plan
assert "Total Time:" in plan

result = cur.sql(
Expand Down
4 changes: 2 additions & 2 deletions test/regression/expected/duckdb_recycle.out
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ EXPLAIN SELECT count(*) FROM ta;
┌─────────────┴─────────────┐
│ POSTGRES_SCAN │
│ ──────────────────── │
(POSTGRES_SCAN) ta
Table: ta
│ │
│ ~2550 Rows │
└───────────────────────────┘
Expand All @@ -39,7 +39,7 @@ EXPLAIN SELECT count(*) FROM ta;
┌─────────────┴─────────────┐
│ POSTGRES_SCAN │
│ ──────────────────── │
(POSTGRES_SCAN) ta
Table: ta
│ │
│ ~2550 Rows │
└───────────────────────────┘
Expand Down
3 changes: 2 additions & 1 deletion test/regression/expected/execution_error.out
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ INSERT INTO int_as_varchar SELECT * from (
) t(a);
SELECT a::INTEGER FROM int_as_varchar;
ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Conversion Error: Could not convert string 'abc' to INT32
LINE 1: SELECT (a)::integer AS a FROM pgduckdb.public.int...

LINE 1: SELECT (a)::integer AS a FROM pgduckdb.public.int_as_varchar
^
DROP TABLE int_as_varchar;
Loading