diff --git a/CMakeLists.txt b/CMakeLists.txt index cec5a697..ab9e1cee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,37 @@ pybind11_add_module( # add _duckdb_dependencies target_link_libraries(_duckdb PRIVATE _duckdb_dependencies) +# ──────────────────────────────────────────── +# Controlling symbol export +# +# We want to export exactly two symbols: - PyInit__duckdb: this allows CPython +# to load the module - duckdb_adbc_init: the DuckDB ADBC driver +# +# The export of symbols on OSX and Linux is controlled by: - Visibility +# annotations in the code (for this lib we use the PYBIND11_EXPORT macro) - +# Telling the linker which symbols we want exported, which we do below +# +# For Windows, we rely on just the visbility annotations. +# ──────────────────────────────────────────── +set_target_properties( + _duckdb + PROPERTIES CXX_VISIBILITY_PRESET hidden + C_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON) + +if(APPLE) + target_link_options( + _duckdb PRIVATE "LINKER:-exported_symbol,_duckdb_adbc_init" + "LINKER:-exported_symbol,_PyInit__duckdb") +elseif(UNIX AND NOT APPLE) + target_link_options( + _duckdb PRIVATE "LINKER:--export-dynamic-symbol=duckdb_adbc_init" + "LINKER:--export-dynamic-symbol=PyInit__duckdb") +elseif(WIN32) + target_link_options(_duckdb PRIVATE "/EXPORT:duckdb_adbc_init" + "/EXPORT:PyInit__duckdb") +endif() + # ──────────────────────────────────────────── # Put the object file in the correct place # ──────────────────────────────────────────── diff --git a/adbc_driver_duckdb/__init__.py b/adbc_driver_duckdb/__init__.py index 528be73f..e81f5090 100644 --- a/adbc_driver_duckdb/__init__.py +++ b/adbc_driver_duckdb/__init__.py @@ -19,12 +19,11 @@ import enum import functools +import importlib import typing import adbc_driver_manager -__all__ = ["StatementOptions", "connect"] - class StatementOptions(enum.Enum): """Statement options specific to the DuckDB driver.""" @@ -36,12 +35,16 @@ class StatementOptions(enum.Enum): def connect(path: typing.Optional[str] = None) -> adbc_driver_manager.AdbcDatabase: """Create a low level ADBC connection to DuckDB.""" if path is None: - return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init") - return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init", path=path) + return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init") + return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init", path=path) @functools.cache -def _driver_path() -> str: - import duckdb - - return duckdb.duckdb.__file__ +def driver_path() -> str: + """Get the path to the DuckDB ADBC driver.""" + duckdb_module_spec = importlib.util.find_spec("_duckdb") + if duckdb_module_spec is None: + msg = "Could not find duckdb shared library. Did you pip install duckdb?" + raise ImportError(msg) + print(f"Found duckdb shared library at {duckdb_module_spec.origin}") + return duckdb_module_spec.origin diff --git a/pyproject.toml b/pyproject.toml index 0920e1c2..ece5da98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l "numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy() "pandas", # used for pandas dataframes all over the place "pyarrow", # used for pyarrow support - "adbc_driver_manager", # for the adbc driver (TODO: this should live under the duckdb package) + "adbc-driver-manager", # for the adbc driver ] ###################################################################################################### @@ -77,6 +77,7 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" [tool.scikit-build.wheel] cmake = true packages.duckdb = "duckdb" +packages.adbc_driver_duckdb = "adbc_driver_duckdb" [tool.scikit-build.cmake.define] CORE_EXTENSIONS = "core_functions;json;parquet;icu;jemalloc" @@ -224,6 +225,7 @@ stubdeps = [ # dependencies used for typehints in the stubs "pyarrow", ] test = [ # dependencies used for running tests + "adbc-driver-manager", "pytest", "pytest-reraise", "pytest-timeout", diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp index 939fa41a..1dd3ba17 100644 --- a/src/duckdb_py/duckdb_python.cpp +++ b/src/duckdb_py/duckdb_python.cpp @@ -20,6 +20,7 @@ #include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp" #include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp" #include "duckdb/common/enums/statement_type.hpp" +#include "duckdb/common/adbc/adbc-init.hpp" #include "duckdb.hpp" @@ -1007,7 +1008,35 @@ static void RegisterExpectedResultType(py::handle &m) { expected_return_type.export_values(); } +// ###################################################################### +// Symbol exports +// +// We want to limit the symbols we export to only the absolute minimum. +// This means we compile with -fvisibility=hidden to hide all symbols, +// and then explicitly export only the symbols we want. +// +// Right now we export two symbols only: +// - duckdb_adbc_init: the entrypoint for our ADBC driver +// - PyInit__duckdb: the entrypoint for the python extension +// +// All symbols that need exporting must be added to both the list below +// AND to CMakeLists.txt. +extern "C" { +PYBIND11_EXPORT void *_force_symbol_inclusion() { + static void *symbols[] = { + // Add functions to export here + (void *)&duckdb_adbc_init, + }; + return symbols; +} +}; + PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT + // DO NOT REMOVE: the below forces that we include all symbols we want to export + volatile auto *keep_alive = _force_symbol_inclusion(); + (void)keep_alive; + // END + py::enum_(m, "ExplainType") .value("STANDARD", duckdb::ExplainType::EXPLAIN_STANDARD) .value("ANALYZE", duckdb::ExplainType::EXPLAIN_ANALYZE) diff --git a/tests/fast/adbc/test_adbc.py b/tests/fast/adbc/test_adbc.py index 94a86269..80920a99 100644 --- a/tests/fast/adbc/test_adbc.py +++ b/tests/fast/adbc/test_adbc.py @@ -1,24 +1,21 @@ import datetime +import sys from pathlib import Path +import adbc_driver_manager.dbapi import numpy as np +import pyarrow import pytest -import duckdb +import adbc_driver_duckdb.dbapi -adbc_driver_manager = pytest.importorskip("adbc_driver_manager.dbapi") -adbc_driver_manager_lib = pytest.importorskip("adbc_driver_manager._lib") - -pyarrow = pytest.importorskip("pyarrow") - -# When testing local, if you build via BUILD_PYTHON=1 make, you need to manually set up the -# dylib duckdb path. -driver_path = duckdb.duckdb.__file__ +xfail = pytest.mark.xfail +driver_path = adbc_driver_duckdb.driver_path() @pytest.fixture def duck_conn(): - with adbc_driver_manager.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn: + with adbc_driver_manager.dbapi.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn: yield conn @@ -32,7 +29,7 @@ def example_table(): ) -@pytest.mark.xfail +@xfail(sys.platform == "win32", reason="adbc-driver-manager.adbc_get_info() returns an empty dict on windows") def test_connection_get_info(duck_conn): assert duck_conn.adbc_get_info() != {} @@ -45,6 +42,9 @@ def test_connection_get_table_types(duck_conn): assert duck_conn.adbc_get_table_types() == ["BASE TABLE"] +@xfail( + sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows" +) def test_connection_get_objects(duck_conn): with duck_conn.cursor() as cursor: cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)") @@ -66,6 +66,9 @@ def test_connection_get_objects(duck_conn): assert depth_all.schema == depth_catalogs.schema +@xfail( + sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows" +) def test_connection_get_objects_filters(duck_conn): with duck_conn.cursor() as cursor: cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)") @@ -98,7 +101,7 @@ def test_commit(tmp_path): table = example_table() db_kwargs = {"path": f"{db}"} # Start connection with auto-commit off - with adbc_driver_manager.connect( + with adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -108,7 +111,7 @@ def test_commit(tmp_path): cur.adbc_ingest("ingest", table, "create") # Check Data is not there - with adbc_driver_manager.connect( + with adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -118,7 +121,7 @@ def test_commit(tmp_path): with conn.cursor() as cur: # This errors because the table does not exist with pytest.raises( - adbc_driver_manager_lib.InternalError, + adbc_driver_manager._lib.InternalError, match=r"Table with name ingest does not exist!", ): cur.execute("SELECT count(*) from ingest") @@ -127,7 +130,7 @@ def test_commit(tmp_path): # This now works because we enabled autocommit with ( - adbc_driver_manager.connect( + adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -204,6 +207,7 @@ def test_statement_query(duck_conn): assert cursor.fetch_arrow_table().to_pylist() == [{"foo": 1}] +@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_insertion(duck_conn): table = example_table() reader = table.to_reader() @@ -221,7 +225,7 @@ def test_insertion(duck_conn): # Test Append with duck_conn.cursor() as cursor: with pytest.raises( - adbc_driver_manager_lib.InternalError, + adbc_driver_manager.InternalError, match=r'Table with name "ingest_table" already exists!', ): cursor.adbc_ingest("ingest_table", table, "create") @@ -230,6 +234,7 @@ def test_insertion(duck_conn): assert cursor.fetch_arrow_table().to_pydict() == {"count_star()": [8]} +@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_read(duck_conn): with duck_conn.cursor() as cursor: filename = Path(__file__).parent / ".." / "data" / "category.csv" @@ -299,7 +304,7 @@ def test_large_chunk(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.connect( + adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -325,7 +330,7 @@ def test_dictionary_data(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.connect( + adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, @@ -353,7 +358,7 @@ def test_ree_data(tmp_path): db.unlink() db_kwargs = {"path": f"{db}"} with ( - adbc_driver_manager.connect( + adbc_driver_manager.dbapi.connect( driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs=db_kwargs, diff --git a/tests/fast/adbc/test_connection_get_info.py b/tests/fast/adbc/test_connection_get_info.py index 9c2657ca..8bc4b97a 100644 --- a/tests/fast/adbc/test_connection_get_info.py +++ b/tests/fast/adbc/test_connection_get_info.py @@ -1,31 +1,19 @@ -import pytest +import pyarrow as pa +import adbc_driver_duckdb.dbapi import duckdb -pa = pytest.importorskip("pyarrow") -adbc_driver_manager = pytest.importorskip("adbc_driver_manager") - - -try: - adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb.dbapi") - con = adbc_driver_duckdb.connect() -except adbc_driver_manager.InternalError as e: - pytest.skip( - f"'duckdb_adbc_init' was not exported in this install, try running 'python3 setup.py install': {e}", - allow_module_level=True, - ) - class TestADBCConnectionGetInfo: def test_connection_basic(self): - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: cursor.execute("select 42") res = cursor.fetchall() assert res == [(42,)] def test_connection_get_info_all(self): - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info() reader = pa.RecordBatchReader._import_from_c(res.address) @@ -35,7 +23,7 @@ def test_connection_get_info_all(self): expected_result = pa.array( [ "duckdb", - "v" + duckdb.__version__, # don't hardcode this, as it will change every version + "v" + duckdb.duckdb_version, # don't hardcode this, as it will change every version "ADBC DuckDB Driver", "(unknown)", "(unknown)", @@ -49,7 +37,7 @@ def test_connection_get_info_all(self): assert string_values == expected_result def test_empty_result(self): - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info([1337]) reader = pa.RecordBatchReader._import_from_c(res.address) @@ -60,7 +48,7 @@ def test_empty_result(self): assert values.num_chunks == 0 def test_unrecognized_codes(self): - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() adbc_con = con.adbc_connection res = adbc_con.get_info([0, 1000, 4, 2000]) reader = pa.RecordBatchReader._import_from_c(res.address) diff --git a/tests/fast/adbc/test_statement_bind.py b/tests/fast/adbc/test_statement_bind.py index 9d8ce520..d35693ff 100644 --- a/tests/fast/adbc/test_statement_bind.py +++ b/tests/fast/adbc/test_statement_bind.py @@ -1,10 +1,12 @@ +import sys + +import adbc_driver_manager +import pyarrow as pa import pytest -pa = pytest.importorskip("pyarrow") -adbc_driver_manager = pytest.importorskip("adbc_driver_manager") +import adbc_driver_duckdb.dbapi -adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb.dbapi") -con = adbc_driver_duckdb.connect() +xfail = pytest.mark.xfail def _import(handle): @@ -33,7 +35,7 @@ def test_bind_multiple_rows(self): names=["ints"], ) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? * 2 as i") @@ -55,7 +57,7 @@ def test_bind_single_row(self): names=["ints"], ) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? * 2 as i") @@ -74,6 +76,7 @@ def test_bind_single_row(self): result_values = result.chunk(0) assert result_values == expected_result + @xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_multiple_parameters(self): int_data = pa.array([5]) varchar_data = pa.array(["not a short string"]) @@ -90,7 +93,7 @@ def test_multiple_parameters(self): names=["ints", "strings", "bools"], ) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a, ? as b, ? as c") @@ -120,7 +123,7 @@ def test_bind_composite_type(self): # Create the RecordBatch record_batch = pa.RecordBatch.from_arrays([struct_array], schema=schema) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a") @@ -143,7 +146,7 @@ def test_too_many_parameters(self): names=["ints", "strings"], ) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a") @@ -165,13 +168,14 @@ def test_too_many_parameters(self): ): statement.execute_query() + @xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows") def test_not_enough_parameters(self): data = pa.record_batch( [["not a short string"]], names=["strings"], ) - con = adbc_driver_duckdb.connect() + con = adbc_driver_duckdb.dbapi.connect() with con.cursor() as cursor: statement = cursor.adbc_statement statement.set_sql_query("select ? as a, ? as b")