Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,37 @@ pybind11_add_module(
# add _duckdb_dependencies
target_link_libraries(_duckdb PRIVATE _duckdb_dependencies)

# ────────────────────────────────────────────
# Controlling symbol export
#
# We want to export exactly two symbols: - PyInit__duckdb: this allows CPython
# to load the module - duckdb_adbc_init: the DuckDB ADBC driver
#
# The export of symbols on OSX and Linux is controlled by: - Visibility
# annotations in the code (for this lib we use the PYBIND11_EXPORT macro) -
# Telling the linker which symbols we want exported, which we do below
#
# For Windows, we rely on just the visbility annotations.
# ────────────────────────────────────────────
set_target_properties(
_duckdb
PROPERTIES CXX_VISIBILITY_PRESET hidden
C_VISIBILITY_PRESET hidden
VISIBILITY_INLINES_HIDDEN ON)

if(APPLE)
target_link_options(
_duckdb PRIVATE "LINKER:-exported_symbol,_duckdb_adbc_init"
"LINKER:-exported_symbol,_PyInit__duckdb")
elseif(UNIX AND NOT APPLE)
target_link_options(
_duckdb PRIVATE "LINKER:--export-dynamic-symbol=duckdb_adbc_init"
"LINKER:--export-dynamic-symbol=PyInit__duckdb")
elseif(WIN32)
target_link_options(_duckdb PRIVATE "/EXPORT:duckdb_adbc_init"
"/EXPORT:PyInit__duckdb")
endif()

# ────────────────────────────────────────────
# Put the object file in the correct place
# ────────────────────────────────────────────
Expand Down
19 changes: 11 additions & 8 deletions adbc_driver_duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@

import enum
import functools
import importlib
import typing

import adbc_driver_manager

__all__ = ["StatementOptions", "connect"]


class StatementOptions(enum.Enum):
"""Statement options specific to the DuckDB driver."""
Expand All @@ -36,12 +35,16 @@ class StatementOptions(enum.Enum):
def connect(path: typing.Optional[str] = None) -> adbc_driver_manager.AdbcDatabase:
"""Create a low level ADBC connection to DuckDB."""
if path is None:
return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init")
return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), entrypoint="duckdb_adbc_init", path=path)
return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init")
return adbc_driver_manager.AdbcDatabase(driver=driver_path(), entrypoint="duckdb_adbc_init", path=path)


@functools.cache
def _driver_path() -> str:
import duckdb

return duckdb.duckdb.__file__
def driver_path() -> str:
"""Get the path to the DuckDB ADBC driver."""
duckdb_module_spec = importlib.util.find_spec("_duckdb")
if duckdb_module_spec is None:
msg = "Could not find duckdb shared library. Did you pip install duckdb?"
raise ImportError(msg)
print(f"Found duckdb shared library at {duckdb_module_spec.origin}")
return duckdb_module_spec.origin
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l
"numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy()
"pandas", # used for pandas dataframes all over the place
"pyarrow", # used for pyarrow support
"adbc_driver_manager", # for the adbc driver (TODO: this should live under the duckdb package)
"adbc-driver-manager", # for the adbc driver
]

######################################################################################################
Expand Down Expand Up @@ -77,6 +77,7 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
[tool.scikit-build.wheel]
cmake = true
packages.duckdb = "duckdb"
packages.adbc_driver_duckdb = "adbc_driver_duckdb"

[tool.scikit-build.cmake.define]
CORE_EXTENSIONS = "core_functions;json;parquet;icu;jemalloc"
Expand Down Expand Up @@ -224,6 +225,7 @@ stubdeps = [ # dependencies used for typehints in the stubs
"pyarrow",
]
test = [ # dependencies used for running tests
"adbc-driver-manager",
"pytest",
"pytest-reraise",
"pytest-timeout",
Expand Down
29 changes: 29 additions & 0 deletions src/duckdb_py/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
#include "duckdb/common/enums/statement_type.hpp"
#include "duckdb/common/adbc/adbc-init.hpp"

#include "duckdb.hpp"

Expand Down Expand Up @@ -1007,7 +1008,35 @@ static void RegisterExpectedResultType(py::handle &m) {
expected_return_type.export_values();
}

// ######################################################################
// Symbol exports
//
// We want to limit the symbols we export to only the absolute minimum.
// This means we compile with -fvisibility=hidden to hide all symbols,
// and then explicitly export only the symbols we want.
//
// Right now we export two symbols only:
// - duckdb_adbc_init: the entrypoint for our ADBC driver
// - PyInit__duckdb: the entrypoint for the python extension
//
// All symbols that need exporting must be added to both the list below
// AND to CMakeLists.txt.
extern "C" {
PYBIND11_EXPORT void *_force_symbol_inclusion() {
static void *symbols[] = {
// Add functions to export here
(void *)&duckdb_adbc_init,
};
return symbols;
}
};

PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
// DO NOT REMOVE: the below forces that we include all symbols we want to export
volatile auto *keep_alive = _force_symbol_inclusion();
(void)keep_alive;
// END

py::enum_<duckdb::ExplainType>(m, "ExplainType")
.value("STANDARD", duckdb::ExplainType::EXPLAIN_STANDARD)
.value("ANALYZE", duckdb::ExplainType::EXPLAIN_ANALYZE)
Expand Down
43 changes: 24 additions & 19 deletions tests/fast/adbc/test_adbc.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import datetime
import sys
from pathlib import Path

import adbc_driver_manager.dbapi
import numpy as np
import pyarrow
import pytest

import duckdb
import adbc_driver_duckdb.dbapi

adbc_driver_manager = pytest.importorskip("adbc_driver_manager.dbapi")
adbc_driver_manager_lib = pytest.importorskip("adbc_driver_manager._lib")

pyarrow = pytest.importorskip("pyarrow")

# When testing local, if you build via BUILD_PYTHON=1 make, you need to manually set up the
# dylib duckdb path.
driver_path = duckdb.duckdb.__file__
xfail = pytest.mark.xfail
driver_path = adbc_driver_duckdb.driver_path()


@pytest.fixture
def duck_conn():
with adbc_driver_manager.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn:
with adbc_driver_manager.dbapi.connect(driver=driver_path, entrypoint="duckdb_adbc_init") as conn:
yield conn


Expand All @@ -32,7 +29,7 @@ def example_table():
)


@pytest.mark.xfail
@xfail(sys.platform == "win32", reason="adbc-driver-manager.adbc_get_info() returns an empty dict on windows")
def test_connection_get_info(duck_conn):
assert duck_conn.adbc_get_info() != {}

Expand All @@ -45,6 +42,9 @@ def test_connection_get_table_types(duck_conn):
assert duck_conn.adbc_get_table_types() == ["BASE TABLE"]


@xfail(
sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows"
)
def test_connection_get_objects(duck_conn):
with duck_conn.cursor() as cursor:
cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
Expand All @@ -66,6 +66,9 @@ def test_connection_get_objects(duck_conn):
assert depth_all.schema == depth_catalogs.schema


@xfail(
sys.platform == "win32", reason="adbc-driver-manager.adbc_get_objects() returns an invalid schema dict on windows"
)
def test_connection_get_objects_filters(duck_conn):
with duck_conn.cursor() as cursor:
cursor.execute("CREATE TABLE getobjects (ints BIGINT PRIMARY KEY)")
Expand Down Expand Up @@ -98,7 +101,7 @@ def test_commit(tmp_path):
table = example_table()
db_kwargs = {"path": f"{db}"}
# Start connection with auto-commit off
with adbc_driver_manager.connect(
with adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand All @@ -108,7 +111,7 @@ def test_commit(tmp_path):
cur.adbc_ingest("ingest", table, "create")

# Check Data is not there
with adbc_driver_manager.connect(
with adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand All @@ -118,7 +121,7 @@ def test_commit(tmp_path):
with conn.cursor() as cur:
# This errors because the table does not exist
with pytest.raises(
adbc_driver_manager_lib.InternalError,
adbc_driver_manager._lib.InternalError,
match=r"Table with name ingest does not exist!",
):
cur.execute("SELECT count(*) from ingest")
Expand All @@ -127,7 +130,7 @@ def test_commit(tmp_path):

# This now works because we enabled autocommit
with (
adbc_driver_manager.connect(
adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand Down Expand Up @@ -204,6 +207,7 @@ def test_statement_query(duck_conn):
assert cursor.fetch_arrow_table().to_pylist() == [{"foo": 1}]


@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows")
def test_insertion(duck_conn):
table = example_table()
reader = table.to_reader()
Expand All @@ -221,7 +225,7 @@ def test_insertion(duck_conn):
# Test Append
with duck_conn.cursor() as cursor:
with pytest.raises(
adbc_driver_manager_lib.InternalError,
adbc_driver_manager.InternalError,
match=r'Table with name "ingest_table" already exists!',
):
cursor.adbc_ingest("ingest_table", table, "create")
Expand All @@ -230,6 +234,7 @@ def test_insertion(duck_conn):
assert cursor.fetch_arrow_table().to_pydict() == {"count_star()": [8]}


@xfail(sys.platform == "win32", reason="adbc-driver-manager returns an invalid table schema on windows")
def test_read(duck_conn):
with duck_conn.cursor() as cursor:
filename = Path(__file__).parent / ".." / "data" / "category.csv"
Expand Down Expand Up @@ -299,7 +304,7 @@ def test_large_chunk(tmp_path):
db.unlink()
db_kwargs = {"path": f"{db}"}
with (
adbc_driver_manager.connect(
adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand All @@ -325,7 +330,7 @@ def test_dictionary_data(tmp_path):
db.unlink()
db_kwargs = {"path": f"{db}"}
with (
adbc_driver_manager.connect(
adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand Down Expand Up @@ -353,7 +358,7 @@ def test_ree_data(tmp_path):
db.unlink()
db_kwargs = {"path": f"{db}"}
with (
adbc_driver_manager.connect(
adbc_driver_manager.dbapi.connect(
driver=driver_path,
entrypoint="duckdb_adbc_init",
db_kwargs=db_kwargs,
Expand Down
26 changes: 7 additions & 19 deletions tests/fast/adbc/test_connection_get_info.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,19 @@
import pytest
import pyarrow as pa

import adbc_driver_duckdb.dbapi
import duckdb

pa = pytest.importorskip("pyarrow")
adbc_driver_manager = pytest.importorskip("adbc_driver_manager")


try:
adbc_driver_duckdb = pytest.importorskip("adbc_driver_duckdb.dbapi")
con = adbc_driver_duckdb.connect()
except adbc_driver_manager.InternalError as e:
pytest.skip(
f"'duckdb_adbc_init' was not exported in this install, try running 'python3 setup.py install': {e}",
allow_module_level=True,
)


class TestADBCConnectionGetInfo:
def test_connection_basic(self):
con = adbc_driver_duckdb.connect()
con = adbc_driver_duckdb.dbapi.connect()
with con.cursor() as cursor:
cursor.execute("select 42")
res = cursor.fetchall()
assert res == [(42,)]

def test_connection_get_info_all(self):
con = adbc_driver_duckdb.connect()
con = adbc_driver_duckdb.dbapi.connect()
adbc_con = con.adbc_connection
res = adbc_con.get_info()
reader = pa.RecordBatchReader._import_from_c(res.address)
Expand All @@ -35,7 +23,7 @@ def test_connection_get_info_all(self):
expected_result = pa.array(
[
"duckdb",
"v" + duckdb.__version__, # don't hardcode this, as it will change every version
"v" + duckdb.duckdb_version, # don't hardcode this, as it will change every version
"ADBC DuckDB Driver",
"(unknown)",
"(unknown)",
Expand All @@ -49,7 +37,7 @@ def test_connection_get_info_all(self):
assert string_values == expected_result

def test_empty_result(self):
con = adbc_driver_duckdb.connect()
con = adbc_driver_duckdb.dbapi.connect()
adbc_con = con.adbc_connection
res = adbc_con.get_info([1337])
reader = pa.RecordBatchReader._import_from_c(res.address)
Expand All @@ -60,7 +48,7 @@ def test_empty_result(self):
assert values.num_chunks == 0

def test_unrecognized_codes(self):
con = adbc_driver_duckdb.connect()
con = adbc_driver_duckdb.dbapi.connect()
adbc_con = con.adbc_connection
res = adbc_con.get_info([0, 1000, 4, 2000])
reader = pa.RecordBatchReader._import_from_c(res.address)
Expand Down
Loading