Skip to content

Commit 475130a

Browse files
author
Evert
committed
Three PRs only on main between v1.3.0 and v1.3.1:
- Add qualified parameter to Python GetTableNames API #17797 - Use an arena linked list for the physical operator children #17748 - Pass ExtensionLoader when loading extensions, change extension entry function #17772
1 parent 057c560 commit 475130a

File tree

10 files changed

+154
-19
lines changed

10 files changed

+154
-19
lines changed

README.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,29 @@ This codebase is developed with the following tools:
158158
159159
### Merging changes to pythonpkg from duckdb main
160160
161-
Check the git log for the last changes to the pythonpkg since the last ref you have
161+
1. Checkout main
162+
2Identify the merge commits that brought in tags to main:
163+
```bash
164+
git log --graph --oneline --decorate main --simplify-by-decoration
165+
```
166+
167+
3. Get the log of commits
168+
```bash
169+
git log --oneline 71c5c07cdd..c9254ecff2 -- tools/pythonpkg/
170+
```
162171
172+
4. Checkout v1.3-ossivalis
173+
5. Get the log of commits
163174
```bash
164-
git log <hash>..HEAD -- tools/pythonpkg/
175+
git log --oneline v1.3.0..v1.3.1 -- tools/pythonpkg/
165176
```
177+
git diff --name-status 71c5c07cdd c9254ecff2 -- tools/pythonpkg/
178+
179+
```bash
180+
git log --oneline 71c5c07cdd..c9254ecff2 -- tools/pythonpkg/
181+
git diff --name-status <HASH_A> <HASH_B> -- tools/pythonpkg/
182+
```
183+
166184
167185
## Versioning and Releases
168186

duckdb/__init__.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ class DuckDBPyConnection:
346346
def from_arrow(self, arrow_object: object) -> DuckDBPyRelation: ...
347347
def from_parquet(self, file_glob: str, binary_as_string: bool = False, *, file_row_number: bool = False, filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, compression: Optional[str] = None) -> DuckDBPyRelation: ...
348348
def read_parquet(self, file_glob: str, binary_as_string: bool = False, *, file_row_number: bool = False, filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, compression: Optional[str] = None) -> DuckDBPyRelation: ...
349-
def get_table_names(self, query: str) -> Set[str]: ...
349+
def get_table_names(self, query: str, *, qualified: bool = False) -> Set[str]: ...
350350
def install_extension(self, extension: str, *, force_install: bool = False, repository: Optional[str] = None, repository_url: Optional[str] = None, version: Optional[str] = None) -> None: ...
351351
def load_extension(self, extension: str) -> None: ...
352352
# END OF CONNECTION METHODS
@@ -694,7 +694,7 @@ def from_df(df: pandas.DataFrame, *, connection: DuckDBPyConnection = ...) -> Du
694694
def from_arrow(arrow_object: object, *, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ...
695695
def from_parquet(file_glob: str, binary_as_string: bool = False, *, file_row_number: bool = False, filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, compression: Optional[str] = None, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ...
696696
def read_parquet(file_glob: str, binary_as_string: bool = False, *, file_row_number: bool = False, filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, compression: Optional[str] = None, connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ...
697-
def get_table_names(query: str, *, connection: DuckDBPyConnection = ...) -> Set[str]: ...
697+
def get_table_names(query: str, *, qualified: bool = False, connection: DuckDBPyConnection = ...) -> Set[str]: ...
698698
def install_extension(extension: str, *, force_install: bool = False, repository: Optional[str] = None, repository_url: Optional[str] = None, version: Optional[str] = None, connection: DuckDBPyConnection = ...) -> None: ...
699699
def load_extension(extension: str, *, connection: DuckDBPyConnection = ...) -> None: ...
700700
def project(df: pandas.DataFrame, *args: str, groups: str = "", connection: DuckDBPyConnection = ...) -> DuckDBPyRelation: ...

external/duckdb

Submodule duckdb updated 880 files

scripts/connection_methods.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,13 @@
10311031
"type": "str"
10321032
}
10331033
],
1034+
"kwargs": [
1035+
{
1036+
"name": "qualified",
1037+
"default": "False",
1038+
"type": "bool"
1039+
}
1040+
],
10341041
"return": "Set[str]"
10351042
},
10361043
{

src/duckdb_py/duckdb_python.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -765,13 +765,13 @@ static void InitializeConnectionMethods(py::module_ &m) {
765765
py::arg("compression") = py::none(), py::arg("connection") = py::none());
766766
m.def(
767767
"get_table_names",
768-
[](const string &query, shared_ptr<DuckDBPyConnection> conn = nullptr) {
768+
[](const string &query, bool qualified, shared_ptr<DuckDBPyConnection> conn = nullptr) {
769769
if (!conn) {
770770
conn = DuckDBPyConnection::DefaultConnection();
771771
}
772-
return conn->GetTableNames(query);
772+
return conn->GetTableNames(query, qualified);
773773
},
774-
"Extract the required table names from a query", py::arg("query"), py::kw_only(),
774+
"Extract the required table names from a query", py::arg("query"), py::kw_only(), py::arg("qualified") = false,
775775
py::arg("connection") = py::none());
776776
m.def(
777777
"install_extension",

src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
280280

281281
unique_ptr<DuckDBPyRelation> FromArrow(py::object &arrow_object);
282282

283-
unordered_set<string> GetTableNames(const string &query);
283+
unordered_set<string> GetTableNames(const string &query, bool qualified);
284284

285285
shared_ptr<DuckDBPyConnection> UnregisterPythonObject(const string &name);
286286

src/duckdb_py/pyconnection.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
#include "duckdb/main/stream_query_result.hpp"
5858
#include "duckdb/main/relation/materialized_relation.hpp"
5959
#include "duckdb/main/relation/query_relation.hpp"
60-
#include "duckdb/main/extension_util.hpp"
6160
#include "duckdb/parser/statement/load_statement.hpp"
6261
#include "duckdb_python/expression/pyexpression.hpp"
6362

@@ -280,7 +279,7 @@ static void InitializeConnectionMethods(py::class_<DuckDBPyConnection, shared_pt
280279
py::arg("filename") = false, py::arg("hive_partitioning") = false, py::arg("union_by_name") = false,
281280
py::arg("compression") = py::none());
282281
m.def("get_table_names", &DuckDBPyConnection::GetTableNames, "Extract the required table names from a query",
283-
py::arg("query"));
282+
py::arg("query"), py::kw_only(), py::arg("qualified") = false);
284283
m.def("install_extension", &DuckDBPyConnection::InstallExtension,
285284
"Install an extension by name, with an optional version and/or repository to get the extension from",
286285
py::arg("extension"), py::kw_only(), py::arg("force_install") = false, py::arg("repository") = py::none(),
@@ -1761,9 +1760,9 @@ unique_ptr<DuckDBPyRelation> DuckDBPyConnection::FromArrow(py::object &arrow_obj
17611760
return make_uniq<DuckDBPyRelation>(std::move(rel));
17621761
}
17631762

1764-
unordered_set<string> DuckDBPyConnection::GetTableNames(const string &query) {
1763+
unordered_set<string> DuckDBPyConnection::GetTableNames(const string &query, bool qualified) {
17651764
auto &connection = con.GetConnection();
1766-
return connection.GetTableNames(query);
1765+
return connection.GetTableNames(query, qualified);
17671766
}
17681767

17691768
shared_ptr<DuckDBPyConnection> DuckDBPyConnection::UnregisterPythonObject(const string &name) {
@@ -2085,8 +2084,22 @@ void InstantiateNewInstance(DuckDB &db) {
20852084
auto &db_instance = *db.instance;
20862085
PandasScanFunction scan_fun;
20872086
MapFunction map_fun;
2088-
ExtensionUtil::RegisterFunction(db_instance, scan_fun);
2089-
ExtensionUtil::RegisterFunction(db_instance, map_fun);
2087+
2088+
TableFunctionSet map_set(map_fun.name);
2089+
map_set.AddFunction(std::move(map_fun));
2090+
CreateTableFunctionInfo map_info(std::move(map_set));
2091+
map_info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT;
2092+
2093+
TableFunctionSet scan_set(scan_fun.name);
2094+
scan_set.AddFunction(std::move(scan_fun));
2095+
CreateTableFunctionInfo scan_info(std::move(scan_set));
2096+
scan_info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT;
2097+
2098+
auto &system_catalog = Catalog::GetSystemCatalog(db_instance);
2099+
auto transaction = CatalogTransaction::GetSystemTransaction(db_instance);
2100+
2101+
system_catalog.CreateFunction(transaction, map_info);
2102+
system_catalog.CreateFunction(transaction, scan_info);
20902103
}
20912104

20922105
static shared_ptr<DuckDBPyConnection> FetchOrCreateInstance(const string &database_path, DBConfig &config) {

src/duckdb_py/pyrelation.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -940,11 +940,12 @@ duckdb::pyarrow::Table DuckDBPyRelation::ToArrowTableInternal(idx_t batch_size,
940940
ScopedConfigSetting scoped_setting(
941941
config,
942942
[&batch_size](ClientConfig &config) {
943-
config.result_collector = [&batch_size](ClientContext &context, PreparedStatementData &data) {
943+
config.get_result_collector = [&batch_size](ClientContext &context,
944+
PreparedStatementData &data) -> PhysicalOperator & {
944945
return PhysicalArrowCollector::Create(context, data, batch_size);
945946
};
946947
},
947-
[](ClientConfig &config) { config.result_collector = nullptr; });
948+
[](ClientConfig &config) { config.get_result_collector = nullptr; });
948949
ExecuteOrThrow();
949950
}
950951
AssertResultOpen();

tests/fast/test_get_table_names.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,99 @@ def test_table_fail(self, duckdb_cursor):
1313
conn.close()
1414
with pytest.raises(duckdb.ConnectionException, match="Connection already closed"):
1515
table_names = conn.get_table_names("SELECT * FROM my_table1, my_table2, my_table3")
16+
17+
def test_qualified_parameter_basic(self):
18+
conn = duckdb.connect()
19+
20+
# Default (qualified=False)
21+
table_names = conn.get_table_names("SELECT * FROM test_table")
22+
assert table_names == {'test_table'}
23+
24+
# Explicit qualified=False
25+
table_names = conn.get_table_names("SELECT * FROM test_table", qualified=False)
26+
assert table_names == {'test_table'}
27+
28+
def test_qualified_parameter_schemas(self):
29+
conn = duckdb.connect()
30+
31+
# Default (qualified=False)
32+
query = "SELECT * FROM test_schema.schema_table, main_table"
33+
table_names = conn.get_table_names(query)
34+
assert table_names == {'schema_table', 'main_table'}
35+
36+
# Test with qualified names
37+
table_names = conn.get_table_names(query, qualified=True)
38+
assert table_names == {'test_schema.schema_table', 'main_table'}
39+
40+
def test_qualified_parameter_catalogs(self):
41+
conn = duckdb.connect()
42+
43+
# Test with qualified names including catalogs
44+
query = "SELECT * FROM catalog1.test_schema.catalog_table, regular_table"
45+
46+
# Default (qualified=False)
47+
table_names = conn.get_table_names(query)
48+
assert table_names == {'catalog_table', 'regular_table'}
49+
50+
# With qualified=True
51+
table_names = conn.get_table_names(query, qualified=True)
52+
assert table_names == {'catalog1.test_schema.catalog_table', 'regular_table'}
53+
54+
def test_qualified_parameter_quoted_identifiers(self):
55+
conn = duckdb.connect()
56+
57+
# Test with quoted identifiers
58+
query = 'SELECT * FROM "Schema.With.Dots"."Table.With.Dots", "Table With Spaces"'
59+
60+
# Default (qualified=False)
61+
table_names = conn.get_table_names(query)
62+
assert table_names == {'Table.With.Dots', 'Table With Spaces'}
63+
64+
# With qualified=True
65+
table_names = conn.get_table_names(query, qualified=True)
66+
assert table_names == {'"Schema.With.Dots"."Table.With.Dots"', '"Table With Spaces"'}
67+
68+
def test_expanded_views(self):
69+
conn = duckdb.connect()
70+
conn.execute('CREATE TABLE my_table(i INT)')
71+
conn.execute('CREATE VIEW v1 AS SELECT * FROM my_table')
72+
73+
# Test that v1 expands to my_table
74+
query = 'SELECT col_a FROM v1'
75+
76+
# Default (qualified=False)
77+
table_names = conn.get_table_names(query)
78+
assert table_names == {'my_table'}
79+
80+
# With qualified=True
81+
table_names = conn.get_table_names(query, qualified=True)
82+
assert table_names == {'my_table'}
83+
84+
def test_expanded_views_with_schema(self):
85+
conn = duckdb.connect()
86+
conn.execute('CREATE SCHEMA my_schema')
87+
conn.execute('CREATE TABLE my_schema.my_table(i INT)')
88+
conn.execute('CREATE VIEW v1 AS SELECT * FROM my_schema.my_table')
89+
90+
# Test that v1 expands to my_table
91+
query = 'SELECT col_a FROM v1'
92+
93+
# Default (qualified=False)
94+
table_names = conn.get_table_names(query)
95+
assert table_names == {'my_table'}
96+
97+
# With qualified=True
98+
table_names = conn.get_table_names(query, qualified=True)
99+
assert table_names == {'my_schema.my_table'}
100+
101+
def test_select_function(self):
102+
conn = duckdb.connect()
103+
query = 'SELECT EXTRACT(second FROM i) FROM timestamps;'
104+
105+
# Default (qualified=False)
106+
table_names = conn.get_table_names(query)
107+
assert table_names == {'timestamps'}
108+
109+
# With qualified=True
110+
table_names = conn.get_table_names(query, qualified=True)
111+
assert table_names == {'timestamps'}

tests/fast/test_replacement_scan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def test_same_name_cte(self, duckdb_cursor):
318318
"""
319319
rel = create_relation(duckdb_cursor, query)
320320
res = rel.fetchall()
321-
assert res == [(1,), (2,), (3,)]
321+
assert res == [(2,), (3,), (4,)]
322322

323323
query = """
324324
WITH RECURSIVE df AS (
@@ -328,7 +328,7 @@ def test_same_name_cte(self, duckdb_cursor):
328328
"""
329329
rel = create_relation(duckdb_cursor, query)
330330
res = rel.fetchall()
331-
assert res == [(1,), (2,), (3,)]
331+
assert res == [(2,), (3,), (4,)]
332332

333333
def test_use_with_view(self, duckdb_cursor):
334334
rel = create_relation(duckdb_cursor, "select * from df")

0 commit comments

Comments
 (0)