Skip to content

Commit de3735a

Browse files
mkaruzaJelteF
andauthored
Duckdb 1.2 update (#548)
Co-authored-by: Jelte Fennema-Nio <github-tech@jeltef.nl>
1 parent 662c230 commit de3735a

File tree

21 files changed

+174
-99
lines changed

21 files changed

+174
-99
lines changed

.github/workflows/build_and_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
- name: Install clang-format and ruff
2323
run: python3 -m pip install -r dev_requirements.txt
2424
- name: Run clang-format
25-
run: git clang-format refs/remotes/origin/main --diff
25+
run: find src include -iname '*.hpp' -o -iname '*.h' -o -iname '*.cpp' -o -iname '*.c' | xargs git clang-format --diff origin/main
2626
- name: Run ruff check
2727
run: ruff check --output-format=github .
2828
- name: Run ruff format

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ OBJS += $(subst .c,.o, $(C_SRCS))
1313
# set to `make` to disable ninja
1414
DUCKDB_GEN ?= ninja
1515
# used to know what version of extensions to download
16-
DUCKDB_VERSION = v1.1.3
16+
DUCKDB_VERSION = v1.2.0
1717
# duckdb build tweaks
1818
DUCKDB_CMAKE_VARS = -DBUILD_SHELL=0 -DBUILD_PYTHON=0 -DBUILD_UNITTESTS=0
1919
# set to 1 to disable asserts in DuckDB. This is particularly useful in combinition with MotherDuck.
@@ -119,7 +119,7 @@ lintcheck:
119119
ruff check
120120

121121
format:
122-
git clang-format origin/main
122+
find src include -iname '*.hpp' -o -iname '*.h' -o -iname '*.cpp' -o -iname '*.c' | xargs git clang-format origin/main
123123
ruff format
124124

125125
format-all:

include/pgduckdb/scan/postgres_scan.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ struct PostgresScanGlobalState : public duckdb::GlobalTableFunctionState {
2222
}
2323
void ConstructTableScanQuery(const duckdb::TableFunctionInitInput &input);
2424

25+
private:
26+
int ExtractQueryFilters(duckdb::TableFilter *filter, const char *column_name, duckdb::string &filters,
27+
bool is_optional_filter_parent);
28+
2529
public:
2630
Snapshot snapshot;
2731
Relation rel;
@@ -50,7 +54,6 @@ struct PostgresScanLocalState : public duckdb::LocalTableFunctionState {
5054
struct PostgresScanFunctionData : public duckdb::TableFunctionData {
5155
PostgresScanFunctionData(Relation rel, uint64_t cardinality, Snapshot snapshot);
5256
~PostgresScanFunctionData() override;
53-
5457
duckdb::vector<duckdb::string> complex_filters;
5558
Relation rel;
5659
uint64_t cardinality;
@@ -74,7 +77,7 @@ struct PostgresScanTableFunction : public duckdb::TableFunction {
7477

7578
static duckdb::unique_ptr<duckdb::NodeStatistics> PostgresScanCardinality(duckdb::ClientContext &context,
7679
const duckdb::FunctionData *data);
77-
static std::string ToString(const duckdb::FunctionData *bind_data);
80+
static duckdb::InsertionOrderPreservingMap<duckdb::string> ToString(duckdb::TableFunctionToStringInput &input);
7881
};
7982

8083
} // namespace pgduckdb

src/pgduckdb_types.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
797797
break;
798798
}
799799
case TIMESTAMPTZOID: {
800-
duckdb::timestamp_t timestamp = value.GetValue<duckdb::timestamp_t>();
800+
duckdb::timestamp_tz_t timestamp = value.GetValue<duckdb::timestamp_tz_t>();
801801
slot->tts_values[col] = timestamp.value - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
802802
break;
803803
}
@@ -1236,7 +1236,7 @@ ConvertPostgresParameterToDuckValue(Datum value, Oid postgres_type) {
12361236
return duckdb::Value::TIMESTAMP(duckdb::timestamp_t(DatumGetTimestamp(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
12371237
case TIMESTAMPTZOID:
12381238
return duckdb::Value::TIMESTAMPTZ(
1239-
duckdb::timestamp_t(DatumGetTimestampTz(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
1239+
duckdb::timestamp_tz_t(DatumGetTimestampTz(value) + PGDUCKDB_DUCK_TIMESTAMP_OFFSET));
12401240
case INTERVALOID:
12411241
return duckdb::Value::INTERVAL(DatumGetInterval(value));
12421242
case FLOAT4OID:
@@ -1305,8 +1305,8 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i
13051305
result, duckdb::timestamp_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
13061306
break;
13071307
case duckdb::LogicalTypeId::TIMESTAMP_TZ:
1308-
Append<duckdb::timestamp_t>(
1309-
result, duckdb::timestamp_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
1308+
Append<duckdb::timestamp_tz_t>(
1309+
result, duckdb::timestamp_tz_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
13101310
break;
13111311
case duckdb::LogicalTypeId::INTERVAL:
13121312
Append<duckdb::interval_t>(result, DatumGetInterval(value), offset);

src/scan/postgres_scan.cpp

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include "duckdb/planner/filter/optional_filter.hpp"
2+
13
#include "pgduckdb/scan/postgres_scan.hpp"
24
#include "pgduckdb/scan/postgres_table_reader.hpp"
35
#include "pgduckdb/pgduckdb_types.hpp"
@@ -7,12 +9,71 @@
79
#include "pgduckdb/pgduckdb_process_lock.hpp"
810
#include "pgduckdb/logger.hpp"
911

12+
#include <numeric> // std::accumulate
13+
1014
namespace pgduckdb {
1115

1216
//
1317
// PostgresScanGlobalState
1418
//
1519

20+
static duckdb::string
21+
FilterJoin(duckdb::vector<duckdb::string> &filters, duckdb::string &&delimiter) {
22+
return std::accumulate(filters.begin() + 1, filters.end(), filters[0],
23+
[&delimiter](duckdb::string l, duckdb::string r) { return l + delimiter + r; });
24+
}
25+
26+
int
27+
PostgresScanGlobalState::ExtractQueryFilters(duckdb::TableFilter *filter, const char *column_name,
28+
duckdb::string &query_filters, bool is_inside_optional_filter) {
29+
switch (filter->filter_type) {
30+
case duckdb::TableFilterType::CONSTANT_COMPARISON:
31+
case duckdb::TableFilterType::IS_NULL:
32+
case duckdb::TableFilterType::IS_NOT_NULL:
33+
case duckdb::TableFilterType::IN_FILTER: {
34+
query_filters += filter->ToString(column_name).c_str();
35+
return 1;
36+
}
37+
case duckdb::TableFilterType::CONJUNCTION_OR:
38+
case duckdb::TableFilterType::CONJUNCTION_AND: {
39+
auto conjuction_filter = reinterpret_cast<duckdb::ConjunctionFilter *>(filter);
40+
duckdb::vector<std::string> conjuction_child_filters;
41+
for (idx_t i = 0; i < conjuction_filter->child_filters.size(); i++) {
42+
std::string child_filter;
43+
if (ExtractQueryFilters(conjuction_filter->child_filters[i].get(), column_name, child_filter,
44+
is_inside_optional_filter)) {
45+
conjuction_child_filters.emplace_back(child_filter);
46+
}
47+
}
48+
duckdb::string conjuction_delimiter =
49+
filter->filter_type == duckdb::TableFilterType::CONJUNCTION_OR ? " OR " : " AND ";
50+
if (conjuction_child_filters.size()) {
51+
query_filters += "(" + FilterJoin(conjuction_child_filters, std::move(conjuction_delimiter)) + ")";
52+
}
53+
return conjuction_child_filters.size();
54+
}
55+
case duckdb::TableFilterType::OPTIONAL_FILTER: {
56+
auto optional_filter = reinterpret_cast<duckdb::OptionalFilter *>(filter);
57+
return ExtractQueryFilters(optional_filter->child_filter.get(), column_name, query_filters, true);
58+
}
59+
/* DYNAMIC_FILTER is push down filter from topN execution. STRUCT_EXTRACT is
60+
* only received if struct_extract function is used. Default will catch all
61+
* filter that could be added in future in DuckDB.
62+
*/
63+
case duckdb::TableFilterType::DYNAMIC_FILTER:
64+
case duckdb::TableFilterType::STRUCT_EXTRACT:
65+
default: {
66+
if (is_inside_optional_filter) {
67+
pd_log(DEBUG1, "(DuckDB/ExtractQueryFilters) Unsupported optional filter: %s",
68+
filter->ToString(column_name).c_str());
69+
return 0;
70+
}
71+
throw duckdb::Exception(duckdb::ExceptionType::EXECUTOR,
72+
"Invalid Filter Type: " + filter->ToString(column_name));
73+
}
74+
}
75+
}
76+
1677
void
1778
PostgresScanGlobalState::ConstructTableScanQuery(const duckdb::TableFunctionInitInput &input) {
1879
/* SELECT COUNT(*) FROM */
@@ -82,27 +143,23 @@ PostgresScanGlobalState::ConstructTableScanQuery(const duckdb::TableFunctionInit
82143

83144
scan_query << " FROM " << GenerateQualifiedRelationName(rel);
84145

85-
first = true;
86-
146+
duckdb::vector<duckdb::string> query_filters;
87147
for (auto const &[attr_num, duckdb_scanned_index] : columns_to_scan) {
88148
auto filter = column_filters[duckdb_scanned_index];
89-
90149
if (!filter) {
91150
continue;
92151
}
93-
94-
if (first) {
95-
scan_query << " WHERE ";
96-
} else {
97-
scan_query << " AND ";
98-
}
99-
100-
first = false;
101-
scan_query << "(";
152+
duckdb::string column_query_filters;
102153
auto attr = GetAttr(table_tuple_desc, attr_num - 1);
103154
auto col = pgduckdb::QuoteIdentifier(GetAttName(attr));
104-
scan_query << filter->ToString(col).c_str();
105-
scan_query << ") ";
155+
if (ExtractQueryFilters(filter, col, column_query_filters, false)) {
156+
query_filters.emplace_back(column_query_filters);
157+
};
158+
}
159+
160+
if (query_filters.size()) {
161+
scan_query << " WHERE ";
162+
scan_query << FilterJoin(query_filters, " AND ");
106163
}
107164
}
108165

@@ -157,12 +214,12 @@ PostgresScanTableFunction::PostgresScanTableFunction()
157214
to_string = ToString;
158215
}
159216

160-
std::string
161-
PostgresScanTableFunction::ToString(const duckdb::FunctionData *data) {
162-
auto &bind_data = data->Cast<PostgresScanFunctionData>();
163-
std::ostringstream oss;
164-
oss << "(POSTGRES_SCAN) " << GetRelationName(bind_data.rel);
165-
return oss.str();
217+
duckdb::InsertionOrderPreservingMap<duckdb::string>
218+
PostgresScanTableFunction::ToString(duckdb::TableFunctionToStringInput &input) {
219+
auto &bind_data = input.bind_data->Cast<PostgresScanFunctionData>();
220+
duckdb::InsertionOrderPreservingMap<duckdb::string> result;
221+
result["Table"] = GetRelationName(bind_data.rel);
222+
return result;
166223
}
167224

168225
duckdb::unique_ptr<duckdb::GlobalTableFunctionState>

test/pycheck/explain_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_explain(cur: Cursor):
2828
result = cur.sql("EXPLAIN SELECT count(*) FROM test_table where id = %s", (1,))
2929
plan = "\n".join(result)
3030
assert "UNGROUPED_AGGREGATE" in plan
31-
assert "id=1 AND id IS NOT NULL" in plan
31+
assert "id=1" in plan
3232
assert "Total Time:" not in plan
3333
assert "Output:" not in plan
3434

@@ -37,7 +37,7 @@ def test_explain(cur: Cursor):
3737
)
3838
plan = "\n".join(result)
3939
assert "UNGROUPED_AGGREGATE" in plan
40-
assert "id=1 AND id IS NOT NULL" in plan
40+
assert "id=1" in plan
4141
assert "Total Time:" in plan
4242
assert "Output:" not in plan
4343

@@ -66,7 +66,7 @@ def test_explain_ctas(cur: Cursor):
6666
"EXPLAIN ANALYZE CREATE TEMP TABLE heap2(id) AS SELECT * from heap1"
6767
)
6868
plan = "\n".join(result)
69-
assert "POSTGRES_SCAN" in plan
69+
assert "TABLE_SCAN" in plan
7070
assert "Total Time:" in plan
7171

7272
result = cur.sql(

test/pycheck/non_superuser_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,26 @@ def test_community_extensions(pg: Postgres):
1818
match="Permission Error: File system LocalFileSystem has been disabled by configuration",
1919
):
2020
cur.sql(
21-
"SELECT * FROM duckdb.raw_query($$ INSTALL avro FROM community; $$)"
21+
"SELECT * FROM duckdb.raw_query($$ INSTALL duckpgq FROM community; $$)"
2222
)
2323

2424
# Even if such community extensions somehow get installed, it's not possible
2525
# to load them without changing allow_community_extensions. Not even for a
2626
# superuser.
2727
with pg.cur() as cur:
2828
cur.sql("SET duckdb.force_execution = false")
29-
cur.sql("SELECT * FROM duckdb.raw_query($$ INSTALL avro FROM community; $$)")
29+
cur.sql("SELECT * FROM duckdb.raw_query($$ INSTALL duckpgq FROM community; $$)")
3030
with pytest.raises(
3131
Exception,
3232
match="IO Error: Extension .* could not be loaded because its signature is either missing or invalid and unsigned extensions are disabled by configuration",
3333
):
34-
cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD avro; $$)")
34+
cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD duckpgq; $$)")
3535

3636
# But it should be possible to load them after changing that setting.
3737
with pg.cur() as cur:
3838
cur.sql("SET duckdb.allow_community_extensions = true")
3939
cur.sql("SET duckdb.force_execution = false")
40-
cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD avro; $$)")
40+
cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD duckpgq; $$)")
4141

4242
# And that setting is only changeable by superusers
4343
with pg.cur() as cur:
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"writerFeatures":[]}}
2-
{"metaData":{"id":"bec316ba-98b6-4d9c-8ad1-a41b4a786d9b","name":"delta_table","description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1730972838217,"configuration":{"delta.minReaderVersion":"3","delta.minWriterVersion":"7"}}}
1+
{"protocol":{"minReaderVersion":1,"minWriterVersion":7,"writerFeatures":[]}}
2+
{"metaData":{"id":"bec316ba-98b6-4d9c-8ad1-a41b4a786d9b","name":"delta_table","description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1730972838217,"configuration":{"delta.minReaderVersion":"1","delta.minWriterVersion":"7"}}}
33
{"commitInfo":{"timestamp":1730972838217,"operation":"CREATE TABLE","operationParameters":{"metadata":"{\"configuration\":{\"delta.minReaderVersion\":\"3\",\"delta.minWriterVersion\":\"7\"},\"createdTime\":1730972838217,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"bec316ba-98b6-4d9c-8ad1-a41b4a786d9b\",\"name\":\"delta_table\",\"partitionColumns\":[],\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"a\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"b\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\"}","mode":"ErrorIfExists","protocol":"{\"minReaderVersion\":3,\"minWriterVersion\":7,\"writerFeatures\":[]}"},"creator":"pg_duckdb","clientVersion":"delta-rs.0.21.0"}}

test/regression/expected/duckdb_recycle.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ EXPLAIN SELECT count(*) FROM ta;
1515
┌─────────────┴─────────────┐
1616
│ POSTGRES_SCAN │
1717
│ ──────────────────── │
18-
(POSTGRES_SCAN) ta
18+
Table: ta
1919
│ │
2020
│ ~2550 Rows │
2121
└───────────────────────────┘
@@ -39,7 +39,7 @@ EXPLAIN SELECT count(*) FROM ta;
3939
┌─────────────┴─────────────┐
4040
│ POSTGRES_SCAN │
4141
│ ──────────────────── │
42-
(POSTGRES_SCAN) ta
42+
Table: ta
4343
│ │
4444
│ ~2550 Rows │
4545
└───────────────────────────┘

test/regression/expected/execution_error.out

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ INSERT INTO int_as_varchar SELECT * from (
55
) t(a);
66
SELECT a::INTEGER FROM int_as_varchar;
77
ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Conversion Error: Could not convert string 'abc' to INT32
8-
LINE 1: SELECT (a)::integer AS a FROM pgduckdb.public.int...
8+
9+
LINE 1: SELECT (a)::integer AS a FROM pgduckdb.public.int_as_varchar
910
^
1011
DROP TABLE int_as_varchar;

0 commit comments

Comments
 (0)