Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/duckdb_py/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_library(python_src OBJECT
duckdb_python.cpp
importer.cpp
map.cpp
module_state.cpp
path_like.cpp
pyconnection.cpp
pyexpression.cpp
Expand Down
26 changes: 22 additions & 4 deletions src/duckdb_py/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "duckdb_python/pybind11/conversions/python_udf_type_enum.hpp"
#include "duckdb_python/pybind11/conversions/python_csv_line_terminator_enum.hpp"
#include "duckdb/common/enums/statement_type.hpp"
#include "duckdb_python/module_state.hpp"

#include "duckdb.hpp"

Expand All @@ -31,6 +32,16 @@ namespace py = pybind11;

namespace duckdb {

// Private function to initialize module state
void InitializeModuleState(py::module_ &m) {
auto state_ptr = new DuckDBPyModuleState();
SetModuleState(state_ptr);

// https://pybind11.readthedocs.io/en/stable/advanced/misc.html#module-destructors
auto capsule = py::capsule(state_ptr, [](void *p) { delete static_cast<DuckDBPyModuleState *>(p); });
m.attr("__duckdb_state") = capsule;
}

enum PySQLTokenType : uint8_t {
PY_SQL_TOKEN_IDENTIFIER = 0,
PY_SQL_TOKEN_NUMERIC_CONSTANT,
Expand Down Expand Up @@ -1007,7 +1018,13 @@ static void RegisterExpectedResultType(py::handle &m) {
expected_return_type.export_values();
}

PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m,
py::multiple_interpreters::not_supported()) { // NOLINT
// Initialize module state completely during initialization
// PEP 489 wants calls for state to be module local, but currently
// static via g_module_state.
InitializeModuleState(m);

py::enum_<duckdb::ExplainType>(m, "ExplainType")
.value("STANDARD", duckdb::ExplainType::EXPLAIN_STANDARD)
.value("ANALYZE", duckdb::ExplainType::EXPLAIN_ANALYZE)
Expand Down Expand Up @@ -1046,9 +1063,10 @@ PYBIND11_MODULE(DUCKDB_PYTHON_LIB_NAME, m) { // NOLINT
m.attr("__version__") = std::string(DuckDB::LibraryVersion()).substr(1);
m.attr("__standard_vector_size__") = DuckDB::StandardVectorSize();
m.attr("__git_revision__") = DuckDB::SourceID();
m.attr("__interactive__") = DuckDBPyConnection::DetectAndGetEnvironment();
m.attr("__jupyter__") = DuckDBPyConnection::IsJupyter();
m.attr("__formatted_python_version__") = DuckDBPyConnection::FormattedPythonVersion();
auto &module_state = GetModuleState();
m.attr("__interactive__") = module_state.environment != PythonEnvironmentType::NORMAL;
m.attr("__jupyter__") = module_state.environment == PythonEnvironmentType::JUPYTER;
m.attr("__formatted_python_version__") = module_state.formatted_python_version;
m.def("default_connection", &DuckDBPyConnection::DefaultConnection,
"Retrieve the connection currently registered as the default to be used by the module");
m.def("set_default_connection", &DuckDBPyConnection::SetDefaultConnection,
Expand Down
50 changes: 50 additions & 0 deletions src/duckdb_py/include/duckdb_python/module_state.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb_python/pybind11/pybind_wrapper.hpp"
#include "duckdb/common/shared_ptr.hpp"
#include "duckdb/main/db_instance_cache.hpp"
#include "duckdb_python/import_cache/python_import_cache.hpp"
#include "duckdb_python/pyconnection/pyconnection.hpp"

namespace duckdb {

// Module state structure to hold per-interpreter state
struct DuckDBPyModuleState {
// Core state
DefaultConnectionHolder default_connection;
shared_ptr<PythonImportCache> import_cache;
std::unique_ptr<DBInstanceCache> instance_cache;

// Python environment tracking
PythonEnvironmentType environment = PythonEnvironmentType::NORMAL;
string formatted_python_version;

DuckDBPyModuleState();

// Encapsulated default connection operations for future free threading control
shared_ptr<DuckDBPyConnection> GetDefaultConnection();
void SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection);
void ClearDefaultConnection();

// Encapsulated import cache operations for future free threading control
PythonImportCache* GetImportCache();
void ResetImportCache();

private:
// Non-copyable
DuckDBPyModuleState(const DuckDBPyModuleState &) = delete;
DuckDBPyModuleState &operator=(const DuckDBPyModuleState &) = delete;
};

DuckDBPyModuleState &GetModuleState();
void SetModuleState(DuckDBPyModuleState *state);

} // namespace duckdb
19 changes: 9 additions & 10 deletions src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

namespace duckdb {
struct BoundParameterData;
struct DuckDBPyModuleState;

enum class PythonEnvironmentType { NORMAL, INTERACTIVE, JUPYTER };

Expand Down Expand Up @@ -172,8 +173,7 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
case_insensitive_set_t registered_objects;

public:
explicit DuckDBPyConnection() {
}
DuckDBPyConnection();
~DuckDBPyConnection();

public:
Expand All @@ -191,8 +191,15 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
static shared_ptr<DuckDBPyConnection> DefaultConnection();
static void SetDefaultConnection(shared_ptr<DuckDBPyConnection> conn);
static PythonImportCache *ImportCache();
// Instance method for fast import cache access using cached module state
PythonImportCache *GetImportCache();
static bool IsInteractive();

// Instance methods for optimized module state access
bool IsJupyterInstance() const;
bool IsInteractiveInstance() const;
std::string FormattedPythonVersionInstance() const;

unique_ptr<DuckDBPyRelation> ReadCSV(const py::object &name, py::kwargs &kwargs);

py::list ExtractStatements(const string &query);
Expand Down Expand Up @@ -337,11 +344,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
py::list ListFilesystems();
bool FileSystemIsRegistered(const string &name);

//! Default connection to an in-memory database
static DefaultConnectionHolder default_connection;
//! Caches and provides an interface to get frequently used modules+subtypes
static shared_ptr<PythonImportCache> import_cache;

static bool IsPandasDataframe(const py::object &object);
static PyArrowObjectType GetArrowType(const py::handle &obj);
static bool IsAcceptedArrowObject(const py::object &object);
Expand All @@ -358,9 +360,6 @@ struct DuckDBPyConnection : public enable_shared_from_this<DuckDBPyConnection> {
void RegisterArrowObject(const py::object &arrow_object, const string &name);
vector<unique_ptr<SQLStatement>> GetStatements(const py::object &query);

static PythonEnvironmentType environment;
static std::string formatted_python_version;
static void DetectEnvironment();
};

template <typename T>
Expand Down
89 changes: 89 additions & 0 deletions src/duckdb_py/module_state.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb_python/module_state.cpp
//
//
//===----------------------------------------------------------------------===//

#include "duckdb_python/module_state.hpp"
#include <stdexcept>

namespace duckdb {

// TODO: Make non-static.
// Left static because of scope required to efficiently pass import_cache
// without expensive lookups
static DuckDBPyModuleState* g_module_state;

// Module state constructor
DuckDBPyModuleState::DuckDBPyModuleState() {
// Create caches
instance_cache = make_uniq<DBInstanceCache>();
import_cache = make_shared_ptr<PythonImportCache>();

// Detects Python environment and version during intialization
// Moved from DuckDBPyConnection::DetectEnvironment()
py::module_ sys = py::module_::import("sys");
py::object version_info = sys.attr("version_info");
int major = py::cast<int>(version_info.attr("major"));
int minor = py::cast<int>(version_info.attr("minor"));
formatted_python_version = std::to_string(major) + "." + std::to_string(minor);

// If __main__ does not have a __file__ attribute, we are in interactive mode
auto main_module = py::module_::import("__main__");
if (!py::hasattr(main_module, "__file__")) {
environment = PythonEnvironmentType::INTERACTIVE;

if (ModuleIsLoaded<IpythonCacheItem>()) {
// Check to see if we are in a Jupyter Notebook
auto get_ipython = import_cache->IPython.get_ipython();
if (get_ipython.ptr() != nullptr) {
auto ipython = get_ipython();
if (py::hasattr(ipython, "config")) {
py::dict ipython_config = ipython.attr("config");
if (ipython_config.contains("IPKernelApp")) {
environment = PythonEnvironmentType::JUPYTER;
}
}
}
}
}

}

DuckDBPyModuleState &GetModuleState() {
// TODO: Externalize this static cache when adding multi-interpreter support
// For now, single interpreter assumption allows simple static caching
if (!g_module_state) {
throw InternalException("Module state not initialized - call SetModuleState() during module init");
}
return *g_module_state;
}

void SetModuleState(DuckDBPyModuleState *state) {
printf("DEBUG: SetModuleState() called - initializing static cache\n");
g_module_state = state;
}

shared_ptr<DuckDBPyConnection> DuckDBPyModuleState::GetDefaultConnection() {
return default_connection.Get();
}

void DuckDBPyModuleState::SetDefaultConnection(shared_ptr<DuckDBPyConnection> connection) {
default_connection.Set(std::move(connection));
}

void DuckDBPyModuleState::ClearDefaultConnection() {
default_connection.Set(nullptr);
}

PythonImportCache* DuckDBPyModuleState::GetImportCache() {
return import_cache.get();
}

void DuckDBPyModuleState::ResetImportCache() {
import_cache.reset();
}

} // namespace duckdb
3 changes: 3 additions & 0 deletions src/duckdb_py/native/python_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,9 @@ void TransformPythonObjectInternal(py::handle ele, A &result, const B &param, bo
break;
}
if (conversion_target.id() == LogicalTypeId::UBIGINT) {
if (PyErr_Occurred()) {
PyErr_Clear();
}
throw InvalidInputException("Python Conversion Failure: Value out of range for type %s",
conversion_target);
}
Expand Down
Loading
Loading