Skip to content

Commit 27b1d79

Browse files
Fix adbc driver (duckdb#81)
Fixes duckdb#74 This PR: - exports the `adbc_driver_duckdb` package so you can use it after `pip install duckdb` - doesn't just skip the tests if the package can't be found - limits the exported symbols to the only two that are ever needed: `duckdb_adbc_init` and `PyInit__duckdb`: ❯ nm -g -C -P .venv/lib/python3.11/site-packages/_duckdb.cpython-311-darwin.so | ag -v ' U ' _PyInit__duckdb T 41900 0 _duckdb_adbc_init T 1054e6c 0
2 parents dc5e746 + 6d5a8c1 commit 27b1d79

File tree

7 files changed

+119
-57
lines changed

7 files changed

+119
-57
lines changed

CMakeLists.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,37 @@ pybind11_add_module(
7979
# add _duckdb_dependencies
8080
target_link_libraries(_duckdb PRIVATE _duckdb_dependencies)
8181

82+
# ────────────────────────────────────────────
83+
# Controlling symbol export
84+
#
85+
# We want to export exactly two symbols: - PyInit__duckdb: this allows CPython
86+
# to load the module - duckdb_adbc_init: the DuckDB ADBC driver
87+
#
88+
# The export of symbols on OSX and Linux is controlled by: - Visibility
89+
# annotations in the code (for this lib we use the PYBIND11_EXPORT macro) -
90+
# Telling the linker which symbols we want exported, which we do below
91+
#
92+
# For Windows, we rely on just the visbility annotations.
93+
# ────────────────────────────────────────────
94+
set_target_properties(
95+
_duckdb
96+
PROPERTIES CXX_VISIBILITY_PRESET hidden
97+
C_VISIBILITY_PRESET hidden
98+
VISIBILITY_INLINES_HIDDEN ON)
99+
100+
if(APPLE)
101+
target_link_options(
102+
_duckdb PRIVATE "LINKER:-exported_symbol,_duckdb_adbc_init"
103+
"LINKER:-exported_symbol,_PyInit__duckdb")
104+
elseif(UNIX AND NOT APPLE)
105+
target_link_options(
106+
_duckdb PRIVATE "LINKER:--export-dynamic-symbol=duckdb_adbc_init"
107+
"LINKER:--export-dynamic-symbol=PyInit__duckdb")
108+
elseif(WIN32)
109+
target_link_options(_duckdb PRIVATE "/EXPORT:duckdb_adbc_init"
110+
"/EXPORT:PyInit__duckdb")
111+
endif()
112+
82113
# ────────────────────────────────────────────
83114
# Put the object file in the correct place
84115
# ────────────────────────────────────────────

adbc_driver_duckdb/__init__.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@
1919

2020
import enum
2121
import functools
22+
import importlib
2223
import typing
2324

2425
import adbc_driver_manager
2526

26-
__all__ = ["StatementOptions", "connect"]
27-
2827

2928
class StatementOptions(enum.Enum):
3029
"""Statement options specific to the DuckDB driver."""
@@ -36,12 +35,16 @@ class StatementOptions(enum.Enum):
3635
def connect(path: typing.Optional[str] = None) -> adbc_driver_manager.AdbcDatabase:
3736
"""Create a low level ADBC connection to DuckDB."""
3837
if path is None:
39-
return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init")
40-
return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init", path=path)
38+
return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init")
39+
return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init", path=path)
4140

4241

4342
@functools.cache
44-
def _driver_path() -> str:
45-
import duckdb
46-
47-
return duckdb.duckdb.__file__
43+
def driver_path() -> str:
44+
"""Get the path to the DuckDB ADBC driver."""
45+
duckdb_module_spec = importlib.util.find_spec("_duckdb")
46+
if duckdb_module_spec is None:
47+
msg = "Could not find duckdb shared library. Did you pip install duckdb?"
48+
raise ImportError(msg)
49+
print(f"Found duckdb shared library at {duckdb_module_spec.origin}")
50+
return duckdb_module_spec.origin

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l
4848
"numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy()
4949
"pandas", # used for pandas dataframes all over the place
5050
"pyarrow", # used for pyarrow support
51-
"adbc_driver_manager", # for the adbc driver (TODO: this should live under the duckdb package)
51+
"adbc-driver-manager", # for the adbc driver
5252
]
5353

5454
######################################################################################################
@@ -77,6 +77,7 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
7777
[tool.scikit-build.wheel]
7878
cmake = true
7979
packages.duckdb = "duckdb"
80+
packages.adbc_driver_duckdb = "adbc_driver_duckdb"
8081

8182
[tool.scikit-build.cmake.define]
8283
CORE_EXTENSIONS = "core_functions;json;parquet;icu;jemalloc"
@@ -224,6 +225,7 @@ stubdeps = [ # dependencies used for typehints in the stubs
224225
"pyarrow",
225226
]
226227
test = [ # dependencies used for running tests
228+
"adbc-driver-manager",
227229
"pytest",
228230
"pytest-reraise",
229231
"pytest-timeout",

src/duckdb_py/duckdb_python.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
2121
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
2222
#include "duckdb/common/enums/statement_type.hpp"
23+
#include "duckdb/common/adbc/adbc-init.hpp"
2324

2425
#include "duckdb.hpp"
2526

@@ -1007,7 +1008,35 @@ static void RegisterExpectedResultType(py::handle &m) {
10071008
expected_return_type.export_values();
10081009
}
10091010

1011+
// ######################################################################
1012+
// Symbol exports
1013+
//
1014+
// We want to limit the symbols we export to only the absolute minimum.
1015+
// This means we compile with -fvisibility=hidden to hide all symbols,
1016+
// and then explicitly export only the symbols we want.
1017+
//
1018+
// Right now we export two symbols only:
1019+
// - duckdb_adbc_init: the entrypoint for our ADBC driver
1020+
// - PyInit__duckdb: the entrypoint for the python extension
1021+
//
1022+
// All symbols that need exporting must be added to both the list below
1023+
// AND to CMakeLists.txt.
1024+
extern "C" {
1025+
PYBIND11_EXPORT void *_force_symbol_inclusion() {
1026+
static void *symbols[] = {
1027+
// Add functions to export here
1028+
(void *)&duckdb_adbc_init,
1029+
};
1030+
return symbols;
1031+
}
1032+
};
1033+
10101034
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
1035+
// DO NOT REMOVE: the below forces that we include all symbols we want to export
1036+
volatile auto *keep_alive = _force_symbol_inclusion();
1037+
(void)keep_alive;
1038+
// END
1039+
10111040
py::enum_<duckdb::ExplainType>(m, "ExplainType")
10121041
.value("STANDARD", duckdb::ExplainType::EXPLAIN_STANDARD)
10131042
.value("ANALYZE", duckdb::ExplainType::EXPLAIN_ANALYZE)

tests/fast/adbc/test_adbc.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
import datetime
2+
import sys
23
from pathlib import Path
34

5+
import adbc_driver_manager.dbapi
46
import numpy as np
7+
import pyarrow
58
import pytest
69

7-
import duckdb
10+
import adbc_driver_duckdb.dbapi
811

9-
adbc_driver_manager = pytest.importorskip("adbc_driver_manager.dbapi")
10-
adbc_driver_manager_lib = pytest.importorskip("adbc_driver_manager._lib")
11-
12-
pyarrow = pytest.importorskip("pyarrow")
13-
14-
# When testing local, if you build via BUILD_PYTHON=1 make, you need to manually set up the
15-
# dylib duckdb path.
16-
driver_path = duckdb.duckdb.__file__
12+
xfail = pytest.mark.xfail
13+
driver_path = adbc_driver_duckdb.driver_path()
1714

1815

1916
@pytest.fixture
2017
def duck_conn():
21-
with adbc_driver_manager.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn:
18+
with adbc_driver_manager.dbapi.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn:
2219
yield conn
2320

2421

@@ -32,7 +29,7 @@ def example_table():
3229
)
3330

3431

35-
@pytest.mark.xfail
32+
@xfail(sys.platform == "win32", reason="adbc-driver-manager.adbc_get_info() returns an empty dict on windows")
3633
def test_connection_get_info(duck_conn):
3734
assert duck_conn.adbc_get_info() != {}
3835

@@ -45,6 +42,9 @@ def test_connection_get_table_types(duck_conn):
4542
assert duck_conn.adbc_get_table_types() == ["BASE TABLE"]
4643

4744

45+
@xfail(
46+
sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows"
47+
)
4848
def test_connection_get_objects(duck_conn):
4949
with duck_conn.cursor() as cursor:
5050
cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
@@ -66,6 +66,9 @@ def test_connection_get_objects(duck_conn):
6666
assert depth_all.schema == depth_catalogs.schema
6767

6868

69+
@xfail(
70+
sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows"
71+
)
6972
def test_connection_get_objects_filters(duck_conn):
7073
with duck_conn.cursor() as cursor:
7174
cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
@@ -98,7 +101,7 @@ def test_commit(tmp_path):
98101
table = example_table()
99102
db_kwargs = {"path": f"{db}"}
100103
# Start connection with auto-commit off
101-
with adbc_driver_manager.connect(
104+
with adbc_driver_manager.dbapi.connect(
102105
driver=driver_path,
103106
entrypoint="duckdb_adbc_init",
104107
db_kwargs=db_kwargs,
@@ -108,7 +111,7 @@ def test_commit(tmp_path):
108111
cur.adbc_ingest("ingest", table, "create")
109112

110113
# Check Data is not there
111-
with adbc_driver_manager.connect(
114+
with adbc_driver_manager.dbapi.connect(
112115
driver=driver_path,
113116
entrypoint="duckdb_adbc_init",
114117
db_kwargs=db_kwargs,
@@ -118,7 +121,7 @@ def test_commit(tmp_path):
118121
with conn.cursor() as cur:
119122
# This errors because the table does not exist
120123
with pytest.raises(
121-
adbc_driver_manager_lib.InternalError,
124+
adbc_driver_manager._lib.InternalError,
122125
match=r"Table with name ingest does not exist!",
123126
):
124127
cur.execute("SELECT count(*) from ingest")
@@ -127,7 +130,7 @@ def test_commit(tmp_path):
127130

128131
# This now works because we enabled autocommit
129132
with (
130-
adbc_driver_manager.connect(
133+
adbc_driver_manager.dbapi.connect(
131134
driver=driver_path,
132135
entrypoint="duckdb_adbc_init",
133136
db_kwargs=db_kwargs,
@@ -204,6 +207,7 @@ def test_statement_query(duck_conn):
204207
assert cursor.fetch_arrow_table().to_pylist() == [{"foo": 1}]
205208

206209

210+
@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows")
207211
def test_insertion(duck_conn):
208212
table = example_table()
209213
reader = table.to_reader()
@@ -221,7 +225,7 @@ def test_insertion(duck_conn):
221225
# Test Append
222226
with duck_conn.cursor() as cursor:
223227
with pytest.raises(
224-
adbc_driver_manager_lib.InternalError,
228+
adbc_driver_manager.InternalError,
225229
match=r'Table with name "ingest_table" already exists!',
226230
):
227231
cursor.adbc_ingest("ingest_table", table, "create")
@@ -230,6 +234,7 @@ def test_insertion(duck_conn):
230234
assert cursor.fetch_arrow_table().to_pydict() == {"count_star()": [8]}
231235

232236

237+
@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows")
233238
def test_read(duck_conn):
234239
with duck_conn.cursor() as cursor:
235240
filename = Path(__file__).parent / ".." / "data" / "category.csv"
@@ -299,7 +304,7 @@ def test_large_chunk(tmp_path):
299304
db.unlink()
300305
db_kwargs = {"path": f"{db}"}
301306
with (
302-
adbc_driver_manager.connect(
307+
adbc_driver_manager.dbapi.connect(
303308
driver=driver_path,
304309
entrypoint="duckdb_adbc_init",
305310
db_kwargs=db_kwargs,
@@ -325,7 +330,7 @@ def test_dictionary_data(tmp_path):
325330
db.unlink()
326331
db_kwargs = {"path": f"{db}"}
327332
with (
328-
adbc_driver_manager.connect(
333+
adbc_driver_manager.dbapi.connect(
329334
driver=driver_path,
330335
entrypoint="duckdb_adbc_init",
331336
db_kwargs=db_kwargs,
@@ -353,7 +358,7 @@ def test_ree_data(tmp_path):
353358
db.unlink()
354359
db_kwargs = {"path": f"{db}"}
355360
with (
356-
adbc_driver_manager.connect(
361+
adbc_driver_manager.dbapi.connect(
357362
driver=driver_path,
358363
entrypoint="duckdb_adbc_init",
359364
db_kwargs=db_kwargs,

tests/fast/adbc/test_connection_get_info.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,19 @@
1-
import pytest
1+
import pyarrow as pa
22

3+
import adbc_driver_duckdb.dbapi
34
import duckdb
45

5-
pa = pytest.importorskip("pyarrow")
6-
adbc_driver_manager = pytest.importorskip("adbc_driver_manager")
7-
8-
9-
try:
10-
adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb.dbapi")
11-
con = adbc_driver_duckdb.connect()
12-
except adbc_driver_manager.InternalError as e:
13-
pytest.skip(
14-
f"'duckdb_adbc_init' was not exported in this install, try running 'python3 setup.py install': {e}",
15-
allow_module_level=True,
16-
)
17-
186

197
class TestADBCConnectionGetInfo:
208
def test_connection_basic(self):
21-
con = adbc_driver_duckdb.connect()
9+
con = adbc_driver_duckdb.dbapi.connect()
2210
with con.cursor() as cursor:
2311
cursor.execute("select 42")
2412
res = cursor.fetchall()
2513
assert res == [(42,)]
2614

2715
def test_connection_get_info_all(self):
28-
con = adbc_driver_duckdb.connect()
16+
con = adbc_driver_duckdb.dbapi.connect()
2917
adbc_con = con.adbc_connection
3018
res = adbc_con.get_info()
3119
reader = pa.RecordBatchReader._import_from_c(res.address)
@@ -35,7 +23,7 @@ def test_connection_get_info_all(self):
3523
expected_result = pa.array(
3624
[
3725
"duckdb",
38-
"v" + duckdb.__version__, # don't hardcode this, as it will change every version
26+
"v" + duckdb.duckdb_version, # don't hardcode this, as it will change every version
3927
"ADBC DuckDB Driver",
4028
"(unknown)",
4129
"(unknown)",
@@ -49,7 +37,7 @@ def test_connection_get_info_all(self):
4937
assert string_values == expected_result
5038

5139
def test_empty_result(self):
52-
con = adbc_driver_duckdb.connect()
40+
con = adbc_driver_duckdb.dbapi.connect()
5341
adbc_con = con.adbc_connection
5442
res = adbc_con.get_info([1337])
5543
reader = pa.RecordBatchReader._import_from_c(res.address)
@@ -60,7 +48,7 @@ def test_empty_result(self):
6048
assert values.num_chunks == 0
6149

6250
def test_unrecognized_codes(self):
63-
con = adbc_driver_duckdb.connect()
51+
con = adbc_driver_duckdb.dbapi.connect()
6452
adbc_con = con.adbc_connection
6553
res = adbc_con.get_info([0, 1000, 4, 2000])
6654
reader = pa.RecordBatchReader._import_from_c(res.address)

0 commit comments

Comments
 (0)