Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/workflows/packaging_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,9 @@ jobs:
env:
CIBW_ARCHS: ${{ matrix.platform.arch == 'amd64' && 'AMD64' || matrix.platform.arch }}
CIBW_BUILD: ${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }}
# PYTHON_GIL=1: Suppresses the RuntimeWarning that the GIL is enabled on free-threaded builds.
# TODO: Remove PYTHON_GIL=1 when free-threaded is supported.
CIBW_ENVIRONMENT: PYTHON_GIL=1
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }}
name: wheel-${{ matrix.python }}-${{ matrix.platform.os }}_${{ matrix.platform.arch }}
path: wheelhouse/*.whl
compression-level: 0
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ test = [ # dependencies used for running tests
"pytest",
"pytest-reraise",
"pytest-timeout",
"pytest-run-parallel",
"mypy",
"coverage",
"gcovr; python_version < '3.14'",
Expand Down
1 change: 1 addition & 0 deletions src/duckdb_py/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_library(python_src OBJECT
duckdb_python.cpp
importer.cpp
map.cpp
module_state.cpp
path_like.cpp
pyconnection.cpp
pyexpression.cpp
Expand Down
35 changes: 31 additions & 4 deletions src/duckdb_py/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
#include "duckdb/common/enums/statement_type.hpp"
#include "duckdb_python/module_state.hpp"

#include "duckdb.hpp"

Expand All @@ -31,6 +32,16 @@ namespace py = pybind11;

namespace duckdb {

// Private function to initialize module state
void InitializeModuleState(py::module_ &m) {
auto state_ptr = new DuckDBPyModuleState();
SetModuleState(state_ptr);

// https://pybind11.readthedocs.io/en/stable/advanced/misc.html#module-destructors
auto capsule = py::capsule(state_ptr, [](void *p) { delete static_cast<DuckDBPyModuleState *>(p); });
m.attr("__duckdb_state") = capsule;
}

enum PySQLTokenType : uint8_t {
PY_SQL_TOKEN_IDENTIFIER = 0,
PY_SQL_TOKEN_NUMERIC_CONSTANT,
Expand Down Expand Up @@ -1007,7 +1018,22 @@ static void RegisterExpectedResultType(py::handle &m) {
expected_return_type.export_values();
}

PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
// Only mark mod_gil_not_used for 3.14t or later
// This is to not add support for 3.13t
// Py_GIL_DISABLED check is not strictly necessary
#if defined(Py_GIL_DISABLED) && PY_VERSION_HEX >= 0x030e0000
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m, py::mod_gil_not_used(),
py::multiple_interpreters::not_supported()) { // NOLINT
#else
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m,
py::multiple_interpreters::not_supported()) { // NOLINT
#endif

// Initialize module state completely during initialization
// PEP 489 wants calls for state to be module local, but currently
// static via g_module_state.
InitializeModuleState(m);

py::enum_<duckdb::ExplainType>(m, "ExplainType")
.value("STANDARD", duckdb::ExplainType::EXPLAIN_STANDARD)
.value("ANALYZE", duckdb::ExplainType::EXPLAIN_ANALYZE)
Expand Down Expand Up @@ -1046,9 +1072,10 @@ PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
m.attr("__version__") = std::string(DuckDB::LibraryVersion()).substr(1);
m.attr("__standard_vector_size__") = DuckDB::StandardVectorSize();
m.attr("__git_revision__") = DuckDB::SourceID();
m.attr("__interactive__") = DuckDBPyConnection::DetectAndGetEnvironment();
m.attr("__jupyter__") = DuckDBPyConnection::IsJupyter();
m.attr("__formatted_python_version__") = DuckDBPyConnection::FormattedPythonVersion();
auto &module_state = GetModuleState();
m.attr("__interactive__") = module_state.environment != PythonEnvironmentType::NORMAL;
m.attr("__jupyter__") = module_state.environment == PythonEnvironmentType::JUPYTER;
m.attr("__formatted_python_version__") = module_state.formatted_python_version;
m.def("default_connection", &DuckDBPyConnection::DefaultConnection,
"Retrieve the connection currently registered as the default to be used by the module");
m.def("set_default_connection", &DuckDBPyConnection::SetDefaultConnection,
Expand Down
63 changes: 63 additions & 0 deletions src/duckdb_py/include/duckdb_python/module_state.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb_python/pybind11/pybind_wrapper.hpp"
#include "duckdb/common/shared_ptr.hpp"
#include "duckdb/main/db_instance_cache.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb_python/import_cache/python_import_cache.hpp"
#include "duckdb_python/pyconnection/pyconnection.hpp"
#include <pybind11/critical_section.h>

namespace duckdb {

// Module state structure to hold per-interpreter state
struct DuckDBPyModuleState {
// Python environment tracking
PythonEnvironmentType environment = PythonEnvironmentType::NORMAL;
string formatted_python_version;

DuckDBPyModuleState();

shared_ptr<DuckDBPyConnection> GetDefaultConnection();
void SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection);
void ClearDefaultConnection();

PythonImportCache *GetImportCache();
void ClearImportCache();

DBInstanceCache *GetInstanceCache();

static DuckDBPyModuleState &GetGlobalModuleState();
static void SetGlobalModuleState(DuckDBPyModuleState *state);

private:
shared_ptr<DuckDBPyConnection> default_connection_ptr;
PythonImportCache import_cache;
DBInstanceCache instance_cache;
#ifdef Py_GIL_DISABLED
py::object default_con_lock;
#endif

// Implemented as static as a first step towards PEP 489 / multi-phase init
// Intent is to move to per-module object, but frequent calls to import_cache
// need to be considered carefully.
// TODO: Replace with non-static per-interpreter state for multi-interpreter support
static DuckDBPyModuleState *g_module_state;

// Non-copyable
DuckDBPyModuleState(const DuckDBPyModuleState &) = delete;
DuckDBPyModuleState &operator=(const DuckDBPyModuleState &) = delete;
};

DuckDBPyModuleState &GetModuleState();
void SetModuleState(DuckDBPyModuleState *state);

} // namespace duckdb
21 changes: 10 additions & 11 deletions src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

namespace duckdb {
struct BoundParameterData;
struct DuckDBPyModuleState;

enum class PythonEnvironmentType { NORMAL, INTERACTIVE, JUPYTER };

Expand Down Expand Up @@ -172,8 +173,7 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
case_insensitive_set_t registered_objects;

public:
explicit DuckDBPyConnection() {
}
DuckDBPyConnection();
~DuckDBPyConnection();

public:
Expand All @@ -190,9 +190,17 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
static std::string FormattedPythonVersion();
static shared_ptr<DuckDBPyConnection> DefaultConnection();
static void SetDefaultConnection(shared_ptr<DuckDBPyConnection> conn);
static shared_ptr<DuckDBPyConnection> GetDefaultConnection();
static void ClearDefaultConnection();
static void ClearImportCache();
static PythonImportCache *ImportCache();
static bool IsInteractive();

// Instance methods for optimized module state access
bool IsJupyterInstance() const;
bool IsInteractiveInstance() const;
std::string FormattedPythonVersionInstance() const;

unique_ptr<DuckDBPyRelation> ReadCSV(const py::object &name, py::kwargs &kwargs);

py::list ExtractStatements(const string &query);
Expand Down Expand Up @@ -337,11 +345,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
py::list ListFilesystems();
bool FileSystemIsRegistered(const string &name);

//! Default connection to an in-memory database
static DefaultConnectionHolder default_connection;
//! Caches and provides an interface to get frequently used modules+subtypes
static shared_ptr<PythonImportCache> import_cache;

static bool IsPandasDataframe(const py::object &object);
static PyArrowObjectType GetArrowType(const py::handle &obj);
static bool IsAcceptedArrowObject(const py::object &object);
Expand All @@ -357,10 +360,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
bool side_effects);
void RegisterArrowObject(const py::object &arrow_object, const string &name);
vector<unique_ptr<SQLStatement>> GetStatements(const py::object &query);

static PythonEnvironmentType environment;
static std::string formatted_python_version;
static void DetectEnvironment();
};

template <typename T>
Expand Down
128 changes: 128 additions & 0 deletions src/duckdb_py/module_state.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.cpp
//
//
//===----------------------------------------------------------------------===//

#include "duckdb_python/module_state.hpp"
#include <stdexcept>
#include <chrono>
#include <thread>

#define DEBUG_MODULE_STATE 0

namespace duckdb {

// Forward declaration from pyconnection.cpp
void InstantiateNewInstance(DuckDB &db);

// Static member initialization - required for all static class members in C++
DuckDBPyModuleState *DuckDBPyModuleState::g_module_state = nullptr;

DuckDBPyModuleState::DuckDBPyModuleState() {
// Caches are constructed as direct objects - no heap allocation needed

#ifdef Py_GIL_DISABLED
// Initialize lock object for critical sections
// TODO: Consider moving to finer-grained locks
default_con_lock = py::none();
#endif

// Detects Python environment and version during intialization
// Moved from DuckDBPyConnection::DetectEnvironment()
py::module_ sys = py::module_::import("sys");
py::object version_info = sys.attr("version_info");
int major = py::cast<int>(version_info.attr("major"));
int minor = py::cast<int>(version_info.attr("minor"));
formatted_python_version = std::to_string(major) + "." + std::to_string(minor);

// If __main__ does not have a __file__ attribute, we are in interactive mode
auto main_module = py::module_::import("__main__");
if (!py::hasattr(main_module, "__file__")) {
environment = PythonEnvironmentType::INTERACTIVE;

if (ModuleIsLoaded<IpythonCacheItem>()) {
// Check to see if we are in a Jupyter Notebook
auto get_ipython = import_cache.IPython.get_ipython();
if (get_ipython.ptr() != nullptr) {
auto ipython = get_ipython();
if (py::hasattr(ipython, "config")) {
py::dict ipython_config = ipython.attr("config");
if (ipython_config.contains("IPKernelApp")) {
environment = PythonEnvironmentType::JUPYTER;
}
}
}
}
}
}

DuckDBPyModuleState &DuckDBPyModuleState::GetGlobalModuleState() {
// TODO: Externalize this static cache when adding multi-interpreter support
// For now, single interpreter assumption allows simple static caching
if (!g_module_state) {
throw InternalException("Module state not initialized - call SetGlobalModuleState() during module init");
}
return *g_module_state;
}

void DuckDBPyModuleState::SetGlobalModuleState(DuckDBPyModuleState *state) {
#if DEBUG_MODULE_STATE
printf("DEBUG: SetGlobalModuleState() called - initializing static cache (built: %s %s)\n", __DATE__, __TIME__);
#endif
g_module_state = state;
}

DuckDBPyModuleState &GetModuleState() {
#if DEBUG_MODULE_STATE
printf("DEBUG: GetModuleState() called\n");
#endif
return DuckDBPyModuleState::GetGlobalModuleState();
}

void SetModuleState(DuckDBPyModuleState *state) {
DuckDBPyModuleState::SetGlobalModuleState(state);
}

shared_ptr<DuckDBPyConnection> DuckDBPyModuleState::GetDefaultConnection() {
#if defined(Py_GIL_DISABLED)
// TODO: Consider whether a mutex vs a scoped_critical_section
py::scoped_critical_section guard(default_con_lock);
#endif
// Reproduce exact logic from original DefaultConnectionHolder::Get()
if (!default_connection_ptr || default_connection_ptr->con.ConnectionIsClosed()) {
py::dict config_dict;
default_connection_ptr = DuckDBPyConnection::Connect(py::str(":memory:"), false, config_dict);
}
return default_connection_ptr;
}

void DuckDBPyModuleState::SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection) {
#if defined(Py_GIL_DISABLED)
py::scoped_critical_section guard(default_con_lock);
#endif
default_connection_ptr = std::move(connection);
}

void DuckDBPyModuleState::ClearDefaultConnection() {
#if defined(Py_GIL_DISABLED)
py::scoped_critical_section guard(default_con_lock);
#endif
default_connection_ptr = nullptr;
}

PythonImportCache *DuckDBPyModuleState::GetImportCache() {
return &import_cache;
}

void DuckDBPyModuleState::ClearImportCache() {
import_cache = PythonImportCache();
}

DBInstanceCache *DuckDBPyModuleState::GetInstanceCache() {
return &instance_cache;
}

} // namespace duckdb
Loading