From fe426123d83256804bd698be5ce680c272698ba6 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 13:20:32 +0100 Subject: [PATCH 01/19] Use nanobind for the python module --- CMakeLists.txt | 4 +- cmake/external_dependencies.cmake | 12 +- include/sparrow-pycapsule/pycapsule.hpp | 59 +--- .../sparrow_array_python_class.hpp | 117 +++---- src/pycapsule.cpp | 191 +++-------- src/sparrow_array_python_class.cpp | 261 +------------- test/CMakeLists.txt | 11 +- test/sparrow_helpers.py | 25 +- test/test_pycapsule.cpp | 320 ++++-------------- test/test_sparrow_helper_module.cpp | 148 ++++---- test/test_sparrow_integration.py | 18 +- 11 files changed, 302 insertions(+), 864 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32bb7dd..cc4ccf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,8 +174,8 @@ add_library(sparrow-pycapsule ${SPARROW_PYCAPSULE_LIBRARY_TYPE} ${SPARROW_PYCAPS target_link_libraries(sparrow-pycapsule PUBLIC - Python::Python - sparrow::sparrow) + sparrow::sparrow + Python::Python) target_compile_definitions(sparrow-pycapsule PUBLIC ${SPARROW_PYCAPSULE_COMPILE_DEFINITIONS}) diff --git a/cmake/external_dependencies.cmake b/cmake/external_dependencies.cmake index d71d82a..6d1de08 100644 --- a/cmake/external_dependencies.cmake +++ b/cmake/external_dependencies.cmake @@ -64,4 +64,14 @@ if(SPARROW_PYCAPSULE_BUILD_TESTS) ) endif() -find_package(Python REQUIRED COMPONENTS Development) +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module Development.Embed) + +find_package_or_fetch( + PACKAGE_NAME nanobind + GIT_REPOSITORY https://github.com/wjakob/nanobind.git + TAG v2.9.2 +) + +if(TARGET nanobind) + nanobind_build_library(nanobind-static) +endif() diff --git a/include/sparrow-pycapsule/pycapsule.hpp b/include/sparrow-pycapsule/pycapsule.hpp index 9002ab9..e3dc5fc 100644 --- a/include/sparrow-pycapsule/pycapsule.hpp +++ b/include/sparrow-pycapsule/pycapsule.hpp @@ -2,6 +2,7 @@ #include +#define PY_SSIZE_T_CLEAN #include #include @@ -16,64 +17,6 @@ struct ArrowArray; namespace sparrow::pycapsule { - /** - * @brief Capsule destructor for ArrowSchema PyCapsules. - * - * Calls the schema's release callback if not null, then frees the schema. - * This is used as the PyCapsule destructor to ensure proper cleanup. - * - * @param capsule The PyCapsule containing an ArrowSchema pointer - */ - SPARROW_PYCAPSULE_API void release_arrow_schema_pycapsule(PyObject* capsule); - - /** - * @brief Exports a sparrow array's schema to a PyCapsule. - * - * Creates a new ArrowSchema on the heap and transfers ownership from the array. - * The array is moved from and becomes invalid after this call. - * - * @param arr The sparrow array to export (will be moved from) - * @return A new PyCapsule containing the ArrowSchema, or nullptr on error - */ - SPARROW_PYCAPSULE_API PyObject* export_arrow_schema_pycapsule(array& arr); - - /** - * @brief Retrieves the ArrowSchema pointer from a PyCapsule. - * - * @param capsule The PyCapsule to extract the schema from - * @return Pointer to the ArrowSchema, or nullptr if the capsule is invalid (sets Python exception) - */ - SPARROW_PYCAPSULE_API ArrowSchema* get_arrow_schema_pycapsule(PyObject* capsule); - - /** - * @brief Capsule destructor for ArrowArray PyCapsules. - * - * Calls the array's release callback if not null, then frees the array. - * This is used as the PyCapsule destructor to ensure proper cleanup. - * - * @param capsule The PyCapsule containing an ArrowArray pointer - */ - SPARROW_PYCAPSULE_API void release_arrow_array_pycapsule(PyObject* capsule); - - /** - * @brief Exports a sparrow array's data to a PyCapsule. - * - * Creates a new ArrowArray on the heap and transfers ownership from the array. - * The array is moved from and becomes invalid after this call. - * - * @param arr The sparrow array to export (will be moved from) - * @return A new PyCapsule containing the ArrowArray, or nullptr on error - */ - SPARROW_PYCAPSULE_API PyObject* export_arrow_array_pycapsule(array& arr); - - /** - * @brief Retrieves the ArrowArray pointer from a PyCapsule. - * - * @param capsule The PyCapsule to extract the array from - * @return Pointer to the ArrowArray, or nullptr if the capsule is invalid (sets Python exception) - */ - SPARROW_PYCAPSULE_API ArrowArray* get_arrow_array_pycapsule(PyObject* capsule); - /** * @brief Imports a sparrow array from schema and array PyCapsules. * diff --git a/include/sparrow-pycapsule/sparrow_array_python_class.hpp b/include/sparrow-pycapsule/sparrow_array_python_class.hpp index 3418c77..bd3aba8 100644 --- a/include/sparrow-pycapsule/sparrow_array_python_class.hpp +++ b/include/sparrow-pycapsule/sparrow_array_python_class.hpp @@ -11,87 +11,56 @@ namespace sparrow::pycapsule { /** - * @brief Python object structure for SparrowArray. + * @brief C++ wrapper class for sparrow::array with Python interop. * - * This structure holds a pointer to a sparrow::array. The pointer is used - * to avoid issues with C++ objects in C-style Python object structures. + * This class wraps a sparrow::array and provides methods for Arrow PyCapsule + * Interface (ArrowArrayExportable protocol), allowing it to be passed + * directly to libraries like Polars via pl.from_arrow(). + * + * Note: This class is designed to be wrapped by nanobind (or similar) + * in a Python extension module. */ - struct SparrowArrayObject + class SPARROW_PYCAPSULE_API SparrowArray { - PyObject_HEAD sparrow::array* arr; - }; + public: + /** + * @brief Construct a SparrowArray by importing from PyCapsules. + * + * @param schema_capsule PyCapsule containing an ArrowSchema. + * @param array_capsule PyCapsule containing an ArrowArray. + */ + SparrowArray(PyObject* schema_capsule, PyObject* array_capsule); - /** - * @brief Deallocator for SparrowArray Python objects. - */ - SPARROW_PYCAPSULE_API void SparrowArray_dealloc(SparrowArrayObject* self); + /** + * @brief Construct a SparrowArray from an existing sparrow::array. + * + * @param arr The sparrow array to wrap (will be moved). + */ + explicit SparrowArray(sparrow::array&& arr); - /** - * @brief Implementation of __arrow_c_array__ method. - * - * This method exports the wrapped sparrow array as Arrow PyCapsules, - * implementing the Arrow PyCapsule Interface (ArrowArrayExportable protocol). - * - * @param self The SparrowArray object. - * @param args Positional arguments (unused). - * @param kwargs Keyword arguments (optional requested_schema). - * @return A tuple of (schema_capsule, array_capsule). - */ - SPARROW_PYCAPSULE_API PyObject* - SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs); + /** + * @brief Export the array via the Arrow PyCapsule interface. + * + * @return A pair of (schema_capsule, array_capsule). Caller owns the references. + */ + std::pair export_to_capsules() const; - /** - * @brief Get the size of the wrapped array. - * - * @param self The SparrowArray object. - * @param args Positional arguments (unused). - * @return The size of the array as a Python integer. - */ - SPARROW_PYCAPSULE_API PyObject* SparrowArray_size(SparrowArrayObject* self, PyObject* args); + /** + * @brief Get the number of elements in the array. + * + * @return The size of the array. + */ + size_t size() const; - /** - * @brief Get the Python type object for SparrowArray. - * - * This function returns a pointer to the SparrowArrayType. The type is - * initialized on first call if necessary. - * - * @return Pointer to the SparrowArrayType, or nullptr on error. - */ - SPARROW_PYCAPSULE_API PyTypeObject* get_sparrow_array_type(); + /** + * @brief Get a const reference to the underlying sparrow array. + * + * @return The wrapped sparrow array. + */ + const sparrow::array& get_array() const; - /** - * @brief Create a new SparrowArray Python object from a sparrow::array. - * - * This function creates a new Python object that wraps the given sparrow array. - * The array is moved into the Python object, so the caller should not use it - * after this call. - * - * @param arr The sparrow array to wrap (will be moved). - * @return A new reference to a SparrowArray Python object, or nullptr on error. - */ - SPARROW_PYCAPSULE_API PyObject* create_sparrow_array_object(sparrow::array&& arr); - - /** - * @brief Create a new SparrowArray Python object from PyCapsules. - * - * This function creates a new Python object by importing from existing - * Arrow PyCapsules. - * - * @param schema_capsule The schema PyCapsule. - * @param array_capsule The array PyCapsule. - * @return A new reference to a SparrowArray Python object, or nullptr on error. - */ - SPARROW_PYCAPSULE_API PyObject* - create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule); - - /** - * @brief Register the SparrowArray type with a Python module. - * - * This function adds the SparrowArray type to the given module. - * - * @param module The Python module to add the type to. - * @return 0 on success, -1 on error. - */ - SPARROW_PYCAPSULE_API int register_sparrow_array_type(PyObject* module); + private: + sparrow::array m_array; + }; } // namespace sparrow::pycapsule diff --git a/src/pycapsule.cpp b/src/pycapsule.cpp index 989540f..28aac08 100644 --- a/src/pycapsule.cpp +++ b/src/pycapsule.cpp @@ -11,106 +11,63 @@ namespace sparrow::pycapsule namespace { // Internal capsule name constants - constexpr std::string_view arrow_schema_str = "arrow_schema"; - constexpr std::string_view arrow_array_str = "arrow_array"; - } - - void release_arrow_schema_pycapsule(PyObject* capsule) - { - if (capsule == nullptr) - { - return; - } - auto schema = static_cast(PyCapsule_GetPointer(capsule, arrow_schema_str.data())); - if (schema == nullptr) - { - return; - } - if (schema->release != nullptr) - { - schema->release(schema); - } - delete schema; - } - - PyObject* export_arrow_schema_pycapsule(array& arr) - { - // Allocate a new ArrowSchema on the heap and extract (move) the schema - ArrowSchema* arrow_schema_ptr = new ArrowSchema(); - *arrow_schema_ptr = extract_arrow_schema(std::move(arr)); - - PyObject* capsule_ptr = PyCapsule_New( - arrow_schema_ptr, - arrow_schema_str.data(), - release_arrow_schema_pycapsule - ); - if (capsule_ptr == nullptr) - { - arrow_schema_ptr->release(arrow_schema_ptr); - delete arrow_schema_ptr; - return nullptr; - } - return capsule_ptr; - } + constexpr const char* arrow_schema_str = "arrow_schema"; + constexpr const char* arrow_array_str = "arrow_array"; - ArrowSchema* get_arrow_schema_pycapsule(PyObject* capsule) - { - return static_cast(PyCapsule_GetPointer(capsule, arrow_schema_str.data())); - } - - void release_arrow_array_pycapsule(PyObject* capsule) - { - if (capsule == nullptr) + // Capsule destructor for ArrowSchema + void release_arrow_schema_pycapsule(PyObject* capsule) { - return; - } - auto array = static_cast(PyCapsule_GetPointer(capsule, arrow_array_str.data())); - if (array == nullptr) - { - return; - } - if (array->release != nullptr) - { - array->release(array); + if (capsule == nullptr) + { + return; + } + auto* schema = static_cast(PyCapsule_GetPointer(capsule, arrow_schema_str)); + if (schema == nullptr) + { + return; + } + if (schema->release != nullptr) + { + schema->release(schema); + } + delete schema; } - delete array; - } - PyObject* export_arrow_array_pycapsule(array& arr) - { - // Allocate a new ArrowArray on the heap and extract (move) the array - ArrowArray* arrow_array_ptr = new ArrowArray(); - *arrow_array_ptr = extract_arrow_array(std::move(arr)); - - PyObject* capsule_ptr = PyCapsule_New( - arrow_array_ptr, - arrow_array_str.data(), - release_arrow_array_pycapsule - ); - if (capsule_ptr == nullptr) + // Capsule destructor for ArrowArray + void release_arrow_array_pycapsule(PyObject* capsule) { - arrow_array_ptr->release(arrow_array_ptr); - delete arrow_array_ptr; - return nullptr; + if (capsule == nullptr) + { + return; + } + auto* array = static_cast(PyCapsule_GetPointer(capsule, arrow_array_str)); + if (array == nullptr) + { + return; + } + if (array->release != nullptr) + { + array->release(array); + } + delete array; } - return capsule_ptr; - } - - ArrowArray* get_arrow_array_pycapsule(PyObject* capsule) - { - return static_cast(PyCapsule_GetPointer(capsule, arrow_array_str.data())); } array import_array_from_capsules(PyObject* schema_capsule, PyObject* array_capsule) { - ArrowSchema* schema = get_arrow_schema_pycapsule(schema_capsule); + // Get the raw pointers from the capsules + ArrowSchema* schema = static_cast( + PyCapsule_GetPointer(schema_capsule, arrow_schema_str) + ); if (schema == nullptr) { // Error already set by PyCapsule_GetPointer return array{}; } - ArrowArray* arr = get_arrow_array_pycapsule(array_capsule); + ArrowArray* arr = static_cast( + PyCapsule_GetPointer(array_capsule, arrow_array_str) + ); if (arr == nullptr) { // Error already set by PyCapsule_GetPointer @@ -118,8 +75,6 @@ namespace sparrow::pycapsule } // Move the data from the capsule structures - // The capsule destructors will still be called, but they will see - // that release is null and won't do anything ArrowSchema schema_moved = *schema; ArrowArray array_moved = *arr; @@ -139,77 +94,41 @@ namespace sparrow::pycapsule ArrowSchema* schema_ptr = new ArrowSchema(std::move(arrow_schema)); ArrowArray* array_ptr = new ArrowArray(std::move(arrow_array)); - // Check if Python is initialized before creating capsules - if (!Py_IsInitialized()) - { - delete schema_ptr; - delete array_ptr; - throw std::runtime_error("Python is not initialized. Cannot create PyCapsules."); - } - PyObject* schema_capsule = PyCapsule_New( schema_ptr, - arrow_schema_str.data(), + arrow_schema_str, release_arrow_schema_pycapsule ); - if (!schema_capsule) + if (schema_capsule == nullptr) { - // Check for Python error - if (PyErr_Occurred()) + if (schema_ptr->release != nullptr) { - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - - PyObject* str_value = PyObject_Str(value); - const char* error_msg = str_value ? PyUnicode_AsUTF8(str_value) : "Unknown error"; - - std::string error_str = std::string("PyCapsule_New failed for schema: ") + error_msg; - - Py_XDECREF(str_value); - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); - - delete schema_ptr; - delete array_ptr; - throw std::runtime_error(error_str); + schema_ptr->release(schema_ptr); } delete schema_ptr; + if (array_ptr->release != nullptr) + { + array_ptr->release(array_ptr); + } delete array_ptr; return {nullptr, nullptr}; } PyObject* array_capsule = PyCapsule_New( array_ptr, - arrow_array_str.data(), + arrow_array_str, release_arrow_array_pycapsule ); - - if (!array_capsule) + + if (array_capsule == nullptr) { - // Check for Python error - if (PyErr_Occurred()) + Py_DECREF(schema_capsule); + if (array_ptr->release != nullptr) { - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - - PyObject* str_value = PyObject_Str(value); - const char* error_msg = str_value ? PyUnicode_AsUTF8(str_value) : "Unknown error"; - - std::string error_str = std::string("PyCapsule_New failed for array: ") + error_msg; - - Py_XDECREF(str_value); - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); - - delete array_ptr; - Py_DECREF(schema_capsule); - throw std::runtime_error(error_str); + array_ptr->release(array_ptr); } delete array_ptr; - Py_DECREF(schema_capsule); return {nullptr, nullptr}; } diff --git a/src/sparrow_array_python_class.cpp b/src/sparrow_array_python_class.cpp index e5d07be..0bfa063 100644 --- a/src/sparrow_array_python_class.cpp +++ b/src/sparrow_array_python_class.cpp @@ -1,271 +1,34 @@ #include "sparrow-pycapsule/sparrow_array_python_class.hpp" -#include #include namespace sparrow::pycapsule { - void SparrowArray_dealloc(SparrowArrayObject* self) + SparrowArray::SparrowArray(PyObject* schema_capsule, PyObject* array_capsule) + : m_array(import_array_from_capsules(schema_capsule, array_capsule)) { - delete self->arr; - self->arr = nullptr; - Py_TYPE(self)->tp_free(reinterpret_cast(self)); } - PyObject* SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs) + SparrowArray::SparrowArray(sparrow::array&& arr) + : m_array(std::move(arr)) { - static const char* kwlist[] = {"requested_schema", nullptr}; - PyObject* requested_schema = nullptr; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", const_cast(kwlist), &requested_schema)) - { - return nullptr; - } - - // requested_schema is typically ignored for simple cases - // In a full implementation, you might use it to cast to a different type - (void) requested_schema; - - if (self->arr == nullptr) - { - PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data"); - return nullptr; - } - - try - { - auto [schema_capsule, array_capsule] = export_array_to_capsules(*self->arr); - - if (schema_capsule == nullptr || array_capsule == nullptr) - { - Py_XDECREF(schema_capsule); - Py_XDECREF(array_capsule); - PyErr_SetString(PyExc_RuntimeError, "Failed to create Arrow PyCapsules"); - return nullptr; - } - - PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - return result; - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } } - PyObject* SparrowArray_size(SparrowArrayObject* self, [[maybe_unused]] PyObject* args) + std::pair SparrowArray::export_to_capsules() const { - if (self->arr == nullptr) - { - PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data"); - return nullptr; - } - - return PyLong_FromSize_t(self->arr->size()); + // We need a non-const copy since export moves from the array + sparrow::array arr_copy = m_array; + return export_array_to_capsules(arr_copy); } - /** - * @brief Constructor for SparrowArray. - * - * Accepts an object implementing __arrow_c_array__ and imports it. - */ - static PyObject* SparrowArray_new(PyTypeObject* type, PyObject* args, PyObject* kwargs) + size_t SparrowArray::size() const { - static const char* kwlist[] = {"arrow_array", nullptr}; - PyObject* arrow_array = nullptr; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", const_cast(kwlist), &arrow_array)) - { - return nullptr; - } - - // Get __arrow_c_array__ method from the input object - PyObject* arrow_c_array_method = PyObject_GetAttrString(arrow_array, "__arrow_c_array__"); - if (arrow_c_array_method == nullptr) - { - PyErr_SetString( - PyExc_TypeError, - "Input object must implement __arrow_c_array__ (ArrowArrayExportable protocol)" - ); - return nullptr; - } - - // Call __arrow_c_array__() to get the capsules - PyObject* capsules = PyObject_CallNoArgs(arrow_c_array_method); - Py_DECREF(arrow_c_array_method); - - if (capsules == nullptr) - { - return nullptr; - } - - // Unpack the tuple (schema_capsule, array_capsule) - if (!PyTuple_Check(capsules) || PyTuple_Size(capsules) != 2) - { - Py_DECREF(capsules); - PyErr_SetString(PyExc_TypeError, "__arrow_c_array__ must return a tuple of 2 elements"); - return nullptr; - } - - PyObject* schema_capsule = PyTuple_GetItem(capsules, 0); - PyObject* array_capsule = PyTuple_GetItem(capsules, 1); - - try - { - sparrow::array arr = import_array_from_capsules(schema_capsule, array_capsule); - Py_DECREF(capsules); - - // Allocate the object - SparrowArrayObject* self = reinterpret_cast(type->tp_alloc(type, 0)); - if (self == nullptr) - { - return nullptr; - } - - self->arr = new sparrow::array(std::move(arr)); - return reinterpret_cast(self); - } - catch (const std::bad_alloc&) - { - Py_DECREF(capsules); - PyErr_NoMemory(); - return nullptr; - } - catch (const std::exception& e) - { - Py_DECREF(capsules); - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } + return m_array.size(); } - static PyMethodDef SparrowArray_methods[] = { - {"__arrow_c_array__", - reinterpret_cast(SparrowArray_arrow_c_array), - METH_VARARGS | METH_KEYWORDS, - "Export the array via the Arrow PyCapsule interface.\n\n" - "Parameters\n" - "----------\n" - "requested_schema : object, optional\n" - " Requested schema for the output (typically ignored).\n\n" - "Returns\n" - "-------\n" - "tuple[object, object]\n" - " A tuple of (schema_capsule, array_capsule)."}, - {"size", - reinterpret_cast(SparrowArray_size), - METH_NOARGS, - "Get the number of elements in the array.\n\n" - "Returns\n" - "-------\n" - "int\n" - " The size of the array."}, - {nullptr, nullptr, 0, nullptr} // Sentinel - }; - - // The type object - defined as a static variable - static PyTypeObject SparrowArrayType = { - .ob_base = PyVarObject_HEAD_INIT(nullptr, 0).tp_name = "sparrow.SparrowArray", - .tp_basicsize = sizeof(SparrowArrayObject), - .tp_itemsize = 0, - .tp_dealloc = reinterpret_cast(SparrowArray_dealloc), - .tp_flags = Py_TPFLAGS_DEFAULT, - .tp_doc = PyDoc_STR( - "SparrowArray(arrow_array) - Arrow array wrapper implementing __arrow_c_array__.\n\n" - "This class wraps a sparrow array and implements the Arrow PyCapsule\n" - "Interface (ArrowArrayExportable protocol), allowing it to be passed\n" - "directly to libraries like Polars via pl.from_arrow().\n\n" - "Parameters\n" - "----------\n" - "arrow_array : ArrowArrayExportable\n" - " An object implementing __arrow_c_array__ (e.g., PyArrow array)." - ), - .tp_methods = SparrowArray_methods, - .tp_new = SparrowArray_new, - }; - - static bool type_initialized = false; - - PyTypeObject* get_sparrow_array_type() - { - if (!type_initialized) - { - if (PyType_Ready(&SparrowArrayType) < 0) - { - return nullptr; - } - type_initialized = true; - } - return &SparrowArrayType; - } - - PyObject* create_sparrow_array_object(sparrow::array&& arr) - { - PyTypeObject* type = get_sparrow_array_type(); - if (type == nullptr) - { - return nullptr; - } - - SparrowArrayObject* obj = PyObject_New(SparrowArrayObject, type); - if (obj == nullptr) - { - return nullptr; - } - - try - { - obj->arr = new sparrow::array(std::move(arr)); - } - catch (const std::bad_alloc&) - { - Py_DECREF(obj); - PyErr_NoMemory(); - return nullptr; - } - catch (const std::exception& e) - { - Py_DECREF(obj); - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } - - return reinterpret_cast(obj); - } - - PyObject* create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule) - { - try - { - sparrow::array arr = import_array_from_capsules(schema_capsule, array_capsule); - return create_sparrow_array_object(std::move(arr)); - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } - } - - int register_sparrow_array_type(PyObject* module) + const sparrow::array& SparrowArray::get_array() const { - PyTypeObject* type = get_sparrow_array_type(); - if (type == nullptr) - { - return -1; - } - - Py_INCREF(type); - if (PyModule_AddObject(module, "SparrowArray", reinterpret_cast(type)) < 0) - { - Py_DECREF(type); - return -1; - } - - return 0; + return m_array; } } // namespace sparrow::pycapsule diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 50435b5..49910e8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,7 +21,6 @@ target_link_libraries(${test_target} sparrow-pycapsule sparrow::sparrow doctest::doctest - Python::Python ) if(MSVC) @@ -85,7 +84,7 @@ add_custom_target(run_tests_with_junit_report set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utilities") -Python_add_library(test_sparrow_helper MODULE test_sparrow_helper_module.cpp) +nanobind_add_module(test_sparrow_helper test_sparrow_helper_module.cpp) target_link_libraries(test_sparrow_helper PRIVATE @@ -95,17 +94,9 @@ target_link_libraries(test_sparrow_helper target_compile_features(test_sparrow_helper PRIVATE cxx_std_20) -if(MSVC) - target_compile_options(test_sparrow_helper PRIVATE /W4) -else() - target_compile_options(test_sparrow_helper PRIVATE -Wall -Wextra -Wpedantic) -endif() - set_target_properties(test_sparrow_helper PROPERTIES FOLDER tests LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} - # Python modules must not have a debug suffix - Python won't find them - DEBUG_POSTFIX "" ) # Python integration test diff --git a/test/sparrow_helpers.py b/test/sparrow_helpers.py index 24d64b6..faa5b8e 100644 --- a/test/sparrow_helpers.py +++ b/test/sparrow_helpers.py @@ -34,14 +34,31 @@ class SparrowArrayType(ArrowArrayExportable, Protocol): def size(self) -> int: """Get the number of elements in the array.""" ... + + @classmethod + def from_arrow(cls, arrow_array: ArrowArrayExportable) -> "SparrowArrayType": + """Create a SparrowArray from an Arrow-compatible object.""" + ... def _setup_module_path() -> None: """Add the build directory to Python path so we can import test_sparrow_helper.""" - # Check for environment variable first - helper_path = os.environ.get('TEST_SPARROW_HELPER_PATH') + import importlib.util + + # Check for environment variable first (can be either LIB_PATH or PATH variant) + helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') or os.environ.get('TEST_SPARROW_HELPER_PATH') if helper_path: - module_dir = Path(helper_path).parent + helper_file = Path(helper_path) + if helper_file.exists(): + # Load module directly from the given path + spec = importlib.util.spec_from_file_location("test_sparrow_helper", helper_file) + if spec and spec.loader: + module = importlib.util.module_from_spec(spec) + sys.modules["test_sparrow_helper"] = module + spec.loader.exec_module(module) + return + # Also try adding the parent directory to path + module_dir = helper_file.parent if module_dir.exists(): sys.path.insert(0, str(module_dir)) return @@ -61,7 +78,7 @@ def _setup_module_path() -> None: raise ImportError( "Could not find test_sparrow_helper module. " - "Build the project first or set TEST_SPARROW_HELPER_PATH." + "Build the project first or set TEST_SPARROW_HELPER_LIB_PATH." ) diff --git a/test/test_pycapsule.cpp b/test/test_pycapsule.cpp index 4419607..fb19728 100644 --- a/test/test_pycapsule.cpp +++ b/test/test_pycapsule.cpp @@ -47,215 +47,56 @@ namespace sparrow::pycapsule PythonInitializer& operator=(PythonInitializer&&) = delete; }; - // RAII wrapper for PyObject* - struct PyObjectDeleter + // RAII wrapper for managing PyObject* references + struct PyObjectGuard { - void operator()(PyObject* obj) const + PyObject* ptr; + + explicit PyObjectGuard(PyObject* p) + : ptr(p) { - if (obj != nullptr) - { - Py_DECREF(obj); - } } - }; - - using PyObjectPtr = std::unique_ptr; - TEST_SUITE("pycapsule") - { - TEST_CASE("ExportArrowSchemaPyCapsule") + ~PyObjectGuard() { - PythonInitializer py_init; - - SUBCASE("creates_valid_capsule") - { - auto arr = make_test_array(); - // Note: export_arrow_schema_pycapsule moves from arr, so arr becomes invalid after - PyObject* schema_capsule = export_arrow_schema_pycapsule(arr); - - REQUIRE_NE(schema_capsule, nullptr); - CHECK(PyCapsule_CheckExact(schema_capsule)); - - const char* name = PyCapsule_GetName(schema_capsule); - REQUIRE_NE(name, nullptr); - CHECK_EQ(std::string(name), "arrow_schema"); - - // Verify we can get the pointer - ArrowSchema* schema = static_cast( - PyCapsule_GetPointer(schema_capsule, "arrow_schema") - ); - CHECK_NE(schema, nullptr); - CHECK_NE(schema->release, nullptr); - - Py_DECREF(schema_capsule); - } - - SUBCASE("capsule_has_destructor") - { - auto arr = make_test_array(); - PyObject* schema_capsule = export_arrow_schema_pycapsule(arr); - - REQUIRE_NE(schema_capsule, nullptr); - - // Get the schema pointer before destruction - ArrowSchema* schema = static_cast( - PyCapsule_GetPointer(schema_capsule, "arrow_schema") - ); - REQUIRE_NE(schema, nullptr); + Py_XDECREF(ptr); + } - // The destructor should be set - PyCapsule_Destructor destructor = PyCapsule_GetDestructor(schema_capsule); - CHECK_NE(destructor, nullptr); + PyObjectGuard(const PyObjectGuard&) = delete; + PyObjectGuard& operator=(const PyObjectGuard&) = delete; - // Decref will call the destructor - Py_DECREF(schema_capsule); - } + PyObjectGuard(PyObjectGuard&& other) noexcept + : ptr(other.ptr) + { + other.ptr = nullptr; } - TEST_CASE("ExportArrowArrayPyCapsule") + PyObjectGuard& operator=(PyObjectGuard&& other) noexcept { - PythonInitializer py_init; - - SUBCASE("creates_valid_capsule") + if (this != &other) { - auto arr = make_test_array(); - PyObject* array_capsule = export_arrow_array_pycapsule(arr); - - REQUIRE_NE(array_capsule, nullptr); - CHECK(PyCapsule_CheckExact(array_capsule)); - - const char* name = PyCapsule_GetName(array_capsule); - REQUIRE_NE(name, nullptr); - CHECK_EQ(std::string(name), "arrow_array"); - - // Verify we can get the pointer - ArrowArray* array = static_cast(PyCapsule_GetPointer(array_capsule, "arrow_array")); - CHECK_NE(array, nullptr); - CHECK_NE(array->release, nullptr); - - Py_DECREF(array_capsule); - } - - SUBCASE("capsule_has_destructor") - { - auto arr = make_test_array(); - PyObject* array_capsule = export_arrow_array_pycapsule(arr); - - REQUIRE_NE(array_capsule, nullptr); - - // Get the array pointer before destruction - ArrowArray* array = static_cast(PyCapsule_GetPointer(array_capsule, "arrow_array")); - REQUIRE_NE(array, nullptr); - - // The destructor should be set - PyCapsule_Destructor destructor = PyCapsule_GetDestructor(array_capsule); - CHECK_NE(destructor, nullptr); - - // Decref will call the destructor - Py_DECREF(array_capsule); - } - - SUBCASE("array_has_correct_length") - { - auto arr = make_test_array(); - PyObject* array_capsule = export_arrow_array_pycapsule(arr); - - REQUIRE_NE(array_capsule, nullptr); - - ArrowArray* array = static_cast(PyCapsule_GetPointer(array_capsule, "arrow_array")); - REQUIRE_NE(array, nullptr); - CHECK_EQ(array->length, 5); - - Py_DECREF(array_capsule); + Py_XDECREF(ptr); + ptr = other.ptr; + other.ptr = nullptr; } + return *this; } - TEST_CASE("GetArrowSchemaPyCapsule") + PyObject* get() const { - PythonInitializer py_init; - - SUBCASE("returns_valid_schema_pointer") - { - auto arr = make_test_array(); - PyObject* schema_capsule = export_arrow_schema_pycapsule(arr); - - ArrowSchema* schema = get_arrow_schema_pycapsule(schema_capsule); - CHECK_NE(schema, nullptr); - CHECK_NE(schema->release, nullptr); - - Py_DECREF(schema_capsule); - } - - SUBCASE("returns_null_for_wrong_capsule_name") - { - // Create a capsule with wrong name - int dummy = 42; - PyObject* wrong_capsule = PyCapsule_New(&dummy, "wrong_name", nullptr); - - ArrowSchema* schema = get_arrow_schema_pycapsule(wrong_capsule); - CHECK_EQ(schema, nullptr); - CHECK_NE(PyErr_Occurred(), nullptr); - PyErr_Clear(); - - Py_DECREF(wrong_capsule); - } - - SUBCASE("returns_null_for_non_capsule") - { - PyObject* not_capsule = PyLong_FromLong(42); - - ArrowSchema* schema = get_arrow_schema_pycapsule(not_capsule); - CHECK_EQ(schema, nullptr); - CHECK_NE(PyErr_Occurred(), nullptr); - PyErr_Clear(); - - Py_DECREF(not_capsule); - } + return ptr; } - TEST_CASE("GetArrowArrayPyCapsule") + PyObject* release() { - PythonInitializer py_init; - - SUBCASE("returns_valid_array_pointer") - { - auto arr = make_test_array(); - PyObject* array_capsule = export_arrow_array_pycapsule(arr); - - ArrowArray* array = get_arrow_array_pycapsule(array_capsule); - CHECK_NE(array, nullptr); - CHECK_NE(array->release, nullptr); - - Py_DECREF(array_capsule); - } - - SUBCASE("returns_null_for_wrong_capsule_name") - { - // Create a capsule with wrong name - int dummy = 42; - PyObject* wrong_capsule = PyCapsule_New(&dummy, "wrong_name", nullptr); - - ArrowArray* array = get_arrow_array_pycapsule(wrong_capsule); - CHECK_EQ(array, nullptr); - CHECK_NE(PyErr_Occurred(), nullptr); - PyErr_Clear(); - - Py_DECREF(wrong_capsule); - } - - SUBCASE("returns_null_for_non_capsule") - { - PyObject* not_capsule = PyLong_FromLong(42); - - ArrowArray* array = get_arrow_array_pycapsule(not_capsule); - CHECK_EQ(array, nullptr); - CHECK_NE(PyErr_Occurred(), nullptr); - PyErr_Clear(); - - Py_DECREF(not_capsule); - } + PyObject* p = ptr; + ptr = nullptr; + return p; } + }; + TEST_SUITE("pycapsule") + { TEST_CASE("export_array_to_capsules") { PythonInitializer py_init; @@ -265,6 +106,9 @@ namespace sparrow::pycapsule auto arr = make_test_array(); auto [schema_capsule, array_capsule] = export_array_to_capsules(arr); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + REQUIRE_NE(schema_capsule, nullptr); REQUIRE_NE(array_capsule, nullptr); @@ -273,9 +117,6 @@ namespace sparrow::pycapsule CHECK_EQ(std::string(PyCapsule_GetName(schema_capsule)), "arrow_schema"); CHECK_EQ(std::string(PyCapsule_GetName(array_capsule)), "arrow_array"); - - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); } SUBCASE("exported_capsules_contain_valid_data") @@ -283,8 +124,15 @@ namespace sparrow::pycapsule auto arr = make_test_array(); auto [schema_capsule, array_capsule] = export_array_to_capsules(arr); - ArrowSchema* schema = get_arrow_schema_pycapsule(schema_capsule); - ArrowArray* array = get_arrow_array_pycapsule(array_capsule); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + + ArrowSchema* schema = static_cast( + PyCapsule_GetPointer(schema_capsule, "arrow_schema") + ); + ArrowArray* array = static_cast( + PyCapsule_GetPointer(array_capsule, "arrow_array") + ); REQUIRE_NE(schema, nullptr); REQUIRE_NE(array, nullptr); @@ -292,9 +140,6 @@ namespace sparrow::pycapsule CHECK_NE(schema->release, nullptr); CHECK_NE(array->release, nullptr); CHECK_EQ(array->length, 5); - - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); } } @@ -308,6 +153,9 @@ namespace sparrow::pycapsule auto original_arr = make_test_array(); auto [schema_capsule, array_capsule] = export_array_to_capsules(original_arr); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + // Import it back auto imported_arr = import_array_from_capsules(schema_capsule, array_capsule); @@ -318,24 +166,23 @@ namespace sparrow::pycapsule ArrowSchema* schema = static_cast( PyCapsule_GetPointer(schema_capsule, "arrow_schema") ); - ArrowArray* array = static_cast(PyCapsule_GetPointer(array_capsule, "arrow_array")); + ArrowArray* array = static_cast( + PyCapsule_GetPointer(array_capsule, "arrow_array") + ); CHECK_EQ(schema->release, nullptr); CHECK_EQ(array->release, nullptr); - - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); } - // Note: Error handling tests for invalid capsules are omitted - // as they would require more complex setup to avoid crashes - SUBCASE("ownership_transfer_is_correct") { // Export an array auto original_arr = make_test_array(); auto [schema_capsule, array_capsule] = export_array_to_capsules(original_arr); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + // Get pointers before import ArrowSchema* schema_before = static_cast( PyCapsule_GetPointer(schema_capsule, "arrow_schema") @@ -354,10 +201,6 @@ namespace sparrow::pycapsule CHECK_EQ(schema_before->release, nullptr); CHECK_EQ(array_before->release, nullptr); - // The capsule destructors should now be no-ops - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - // The imported array should still be valid CHECK(imported_arr.size() == 5); } @@ -386,82 +229,37 @@ namespace sparrow::pycapsule // Export (moves from original_arr) auto [schema_capsule, array_capsule] = export_array_to_capsules(original_arr); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + // Import auto imported_arr = import_array_from_capsules(schema_capsule, array_capsule); // Verify REQUIRE_EQ(imported_arr.size(), 5); CHECK_EQ(imported_arr.size(), original_size); - - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - } - } - - TEST_CASE("ReleaseArrowSchemaPyCapsule_handles_null_release") - { - PythonInitializer py_init; - - SUBCASE("destructor_handles_already_released_schema") - { - // Create a schema with null release callback - ArrowSchema* schema = new ArrowSchema{}; - schema->release = nullptr; - - PyObject* capsule = PyCapsule_New(schema, "arrow_schema", release_arrow_schema_pycapsule); - - // This should not crash - Py_DECREF(capsule); - } - } - - TEST_CASE("ReleaseArrowArrayPyCapsule_handles_null_release") - { - PythonInitializer py_init; - - SUBCASE("destructor_handles_already_released_array") - { - // Create an array with null release callback - ArrowArray* array = new ArrowArray{}; - array->release = nullptr; - - PyObject* capsule = PyCapsule_New(array, "arrow_array", release_arrow_array_pycapsule); - - // This should not crash - Py_DECREF(capsule); } } - TEST_CASE("memory_leak_prevention") + TEST_CASE("memory_management") { PythonInitializer py_init; - SUBCASE("capsule_destructor_prevents_leak_if_never_consumed") - { - // Create capsules but never consume them - auto arr = make_test_array(); - PyObject* schema_capsule = export_arrow_schema_pycapsule(arr); - PyObject* array_capsule = export_arrow_array_pycapsule(arr); - - // Just decref without consuming - destructors should clean up - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - } - SUBCASE("imported_array_manages_memory_correctly") { { auto original_arr = make_test_array(); auto [schema_capsule, array_capsule] = export_array_to_capsules(original_arr); + PyObjectGuard schema_guard(schema_capsule); + PyObjectGuard array_guard(array_capsule); + { auto imported_arr = import_array_from_capsules(schema_capsule, array_capsule); // imported_arr goes out of scope here } - // Capsules still need cleanup - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); + // Capsules go out of scope and clean up automatically } } } diff --git a/test/test_sparrow_helper_module.cpp b/test/test_sparrow_helper_module.cpp index acb701e..c22ba5e 100644 --- a/test/test_sparrow_helper_module.cpp +++ b/test/test_sparrow_helper_module.cpp @@ -1,9 +1,9 @@ -#define PY_SSIZE_T_CLEAN -#include - #include #include +#include +#include + #include #include #include @@ -11,76 +11,104 @@ #include #include +namespace nb = nanobind; + /** - * Create a test array and return a SparrowArray object. - * - * Python signature: create_test_array() -> SparrowArray + * Create a test sparrow array with sample data. */ -static PyObject* py_create_test_array(PyObject* self, PyObject* args) +sparrow::array create_test_sparrow_array() { - (void)self; - (void)args; + // Create a test array with nullable integers + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + return sparrow::array(std::move(prim_array)); +} - try +/** + * Create a SparrowArray from an object implementing __arrow_c_array__. + */ +sparrow::pycapsule::SparrowArray create_sparrow_array_from_arrow(nb::object arrow_array) +{ + // Get __arrow_c_array__ method from the input object + if (!nb::hasattr(arrow_array, "__arrow_c_array__")) { - // Create a test array with nullable integers - std::vector> values = { - sparrow::make_nullable(10, true), - sparrow::make_nullable(20, true), - sparrow::make_nullable(0, false), // null - sparrow::make_nullable(40, true), - sparrow::make_nullable(50, true) - }; + throw nb::type_error( + "Input object must implement __arrow_c_array__ (ArrowArrayExportable protocol)" + ); + } - sparrow::primitive_array prim_array(std::move(values)); - sparrow::array arr(std::move(prim_array)); + // Call __arrow_c_array__() to get the capsules + nb::object capsules = arrow_array.attr("__arrow_c_array__")(); - // Return a SparrowArray object that implements __arrow_c_array__ - return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); - } - catch (const std::exception& e) + // Unpack the tuple (schema_capsule, array_capsule) + if (!nb::isinstance(capsules) || nb::len(capsules) != 2) { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; + throw nb::type_error("__arrow_c_array__ must return a tuple of 2 elements"); } -} -// Method definitions -static PyMethodDef TestSparrowHelperMethods[] = { - { - "create_test_array", - py_create_test_array, - METH_NOARGS, - "Create a test array and return a SparrowArray object implementing __arrow_c_array__." - }, - {nullptr, nullptr, 0, nullptr} // Sentinel -}; + nb::tuple capsule_tuple = nb::cast(capsules); + PyObject* schema_capsule = capsule_tuple[0].ptr(); + PyObject* array_capsule = capsule_tuple[1].ptr(); -// Module definition -static struct PyModuleDef test_sparrow_helper_module = { - PyModuleDef_HEAD_INIT, - "test_sparrow_helper", // Module name - "Native Python extension providing SparrowArray type for Arrow data exchange.\n" - "Higher-level helpers are available in sparrow_helpers.py.", - -1, // Module state size (-1 = no state) - TestSparrowHelperMethods -}; + return sparrow::pycapsule::SparrowArray(schema_capsule, array_capsule); +} -// Module initialization function -PyMODINIT_FUNC PyInit_test_sparrow_helper(void) +NB_MODULE(test_sparrow_helper, m) { - PyObject* module = PyModule_Create(&test_sparrow_helper_module); - if (module == nullptr) - { - return nullptr; - } + m.doc() = "Native Python extension providing SparrowArray type for Arrow data exchange.\n" + "Higher-level helpers are available in sparrow_helpers.py."; - // Register the SparrowArray type with this module - if (sparrow::pycapsule::register_sparrow_array_type(module) < 0) - { - Py_DECREF(module); - return nullptr; - } + // Define the SparrowArray class using nanobind + nb::class_(m, "SparrowArray", + "SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n" + "This class wraps a sparrow array and implements the Arrow PyCapsule\n" + "Interface (ArrowArrayExportable protocol), allowing it to be passed\n" + "directly to libraries like Polars via pl.from_arrow().\n\n" + "To create a SparrowArray from a PyArrow array, use:\n" + " sparrow_array = SparrowArray.from_arrow(pyarrow_array)") + .def_static("from_arrow", &create_sparrow_array_from_arrow, nb::arg("arrow_array"), + "Construct a SparrowArray from an Arrow-compatible object.\n\n" + "Parameters\n" + "----------\n" + "arrow_array : ArrowArrayExportable\n" + " An object implementing __arrow_c_array__ (e.g., PyArrow array).\n\n" + "Returns\n" + "-------\n" + "SparrowArray\n" + " A new SparrowArray wrapping the input data.") + .def("__arrow_c_array__", [](const sparrow::pycapsule::SparrowArray& self, nb::object /*requested_schema*/) { + auto [schema, array] = self.export_to_capsules(); + // Create a tuple and return ownership to Python + nb::object schema_obj = nb::steal(schema); + nb::object array_obj = nb::steal(array); + return nb::make_tuple(schema_obj, array_obj); + }, nb::arg("requested_schema") = nb::none(), + "Export the array via the Arrow PyCapsule interface.\n\n" + "Parameters\n" + "----------\n" + "requested_schema : object, optional\n" + " Requested schema for the output (typically ignored).\n\n" + "Returns\n" + "-------\n" + "tuple[object, object]\n" + " A tuple of (schema_capsule, array_capsule).") + .def("size", &sparrow::pycapsule::SparrowArray::size, + "Get the number of elements in the array.\n\n" + "Returns\n" + "-------\n" + "int\n" + " The size of the array."); - return module; + // Add the test helper function + m.def("create_test_array", []() { + sparrow::array arr = create_test_sparrow_array(); + return sparrow::pycapsule::SparrowArray(std::move(arr)); + }, "Create a test array and return a SparrowArray object implementing __arrow_c_array__."); } diff --git a/test/test_sparrow_integration.py b/test/test_sparrow_integration.py index a249752..9e79b57 100644 --- a/test/test_sparrow_integration.py +++ b/test/test_sparrow_integration.py @@ -86,10 +86,10 @@ def test_sparrow_array_type(self): f"Expected type 'SparrowArray', got '{type_name}'" ) - # Check the module-qualified name + # Check the module-qualified name (test module, not production module) full_name = f"{type(sparrow_array).__module__}.{type_name}" - assert full_name == "sparrow.SparrowArray", ( - f"Expected 'sparrow.SparrowArray', got '{full_name}'" + assert full_name == "test_sparrow_helper.SparrowArray", ( + f"Expected 'test_sparrow_helper.SparrowArray', got '{full_name}'" ) def test_sparrow_to_polars_series(self): @@ -125,12 +125,12 @@ class TestPyArrowToSparrow: """Test creating an array in PyArrow and importing to sparrow.""" def test_create_sparrow_array_from_pyarrow(self): - """Create a SparrowArray directly from a PyArrow array using the constructor.""" + """Create a SparrowArray directly from a PyArrow array using from_arrow().""" # Create a PyArrow array pa_array = pa.array([100, 200, None, 400, 500], type=pa.int32()) - # Create SparrowArray directly using the type constructor - sparrow_array = SparrowArray(pa_array) + # Create SparrowArray using the factory method + sparrow_array = SparrowArray.from_arrow(pa_array) # Verify it's a SparrowArray assert type(sparrow_array).__name__ == "SparrowArray" @@ -148,7 +148,7 @@ def test_pyarrow_to_sparrow(self): # Verify sparrow can import and read the data via __arrow_c_array__ - sparrow_array: SparrowArrayType = SparrowArray(pa_array) + sparrow_array: SparrowArrayType = SparrowArray.from_arrow(pa_array) assert sparrow_array.size() == 5 def test_pyarrow_roundtrip_through_sparrow(self): @@ -157,7 +157,7 @@ def test_pyarrow_roundtrip_through_sparrow(self): pa_array = pa.array([1, 2, None, 4, 5], type=pa.int32()) # Round-trip through sparrow (import then export as SparrowArray) - sparrow_array = SparrowArray(pa_array) + sparrow_array = SparrowArray.from_arrow(pa_array) # Import the result into Polars result_series = arrow_array_to_series(sparrow_array) @@ -175,7 +175,7 @@ def test_pyarrow_nulls_preserved_in_sparrow(self): pa_array = pa.array([None, 1, None, 3, None], type=pa.int32()) # Round-trip through sparrow - sparrow_array = SparrowArray(pa_array) + sparrow_array = SparrowArray.from_arrow(pa_array) # Import into Polars result_series = arrow_array_to_series(sparrow_array) From 0e7e8f1b99d3c7d1a2c8e4586027e2c0f5bfe112 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 13:53:20 +0100 Subject: [PATCH 02/19] Use nanobind for python module --- CMakeLists.txt | 18 +++ .../sparrow_array_python_class.hpp | 4 +- src/pycapsule.cpp | 7 +- src/sparrow_module.cpp | 102 ++++++++++++++ test/CMakeLists.txt | 14 +- test/sparrow_helpers.py | 61 +++++---- test/test_sparrow_helper_module.cpp | 126 +++++------------- test/test_sparrow_integration.py | 14 +- 8 files changed, 209 insertions(+), 137 deletions(-) create mode 100644 src/sparrow_module.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cc4ccf4..0668c02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -246,6 +246,24 @@ if(BUILD_DOCS) add_subdirectory(docs) endif() +# Python module +# ============= +option(SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE "Build the sparrow Python module" ON) +message(STATUS "🔧 Build Python module: ${SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE}") + +if(SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE) + nanobind_add_module(sparrow_rockfinch src/sparrow_module.cpp) + target_link_libraries(sparrow_rockfinch PRIVATE sparrow-pycapsule sparrow::sparrow) + target_compile_features(sparrow_rockfinch PRIVATE cxx_std_20) + # Define the module name macro to match the output filename (includes debug suffix) + target_compile_definitions(sparrow_rockfinch PRIVATE + SPARROW_MODULE_NAME=$ + ) + set_target_properties(sparrow_rockfinch PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${BINARY_BUILD_DIR} + ) +endif() + # Tests # ===== if(SPARROW_PYCAPSULE_BUILD_TESTS) diff --git a/include/sparrow-pycapsule/sparrow_array_python_class.hpp b/include/sparrow-pycapsule/sparrow_array_python_class.hpp index bd3aba8..98ad618 100644 --- a/include/sparrow-pycapsule/sparrow_array_python_class.hpp +++ b/include/sparrow-pycapsule/sparrow_array_python_class.hpp @@ -50,14 +50,14 @@ namespace sparrow::pycapsule * * @return The size of the array. */ - size_t size() const; + [[nodiscard]] size_t size() const; /** * @brief Get a const reference to the underlying sparrow array. * * @return The wrapped sparrow array. */ - const sparrow::array& get_array() const; + [[nodiscard]] const sparrow::array& get_array() const; private: sparrow::array m_array; diff --git a/src/pycapsule.cpp b/src/pycapsule.cpp index 28aac08..46ff635 100644 --- a/src/pycapsule.cpp +++ b/src/pycapsule.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include @@ -91,8 +88,8 @@ namespace sparrow::pycapsule auto [arrow_array, arrow_schema] = extract_arrow_structures(std::move(arr)); // Allocate heap copies for the PyCapsules - ArrowSchema* schema_ptr = new ArrowSchema(std::move(arrow_schema)); - ArrowArray* array_ptr = new ArrowArray(std::move(arrow_array)); + auto* schema_ptr = new ArrowSchema(arrow_schema); + auto* array_ptr = new ArrowArray(arrow_array); PyObject* schema_capsule = PyCapsule_New( schema_ptr, diff --git a/src/sparrow_module.cpp b/src/sparrow_module.cpp new file mode 100644 index 0000000..8c6016b --- /dev/null +++ b/src/sparrow_module.cpp @@ -0,0 +1,102 @@ +/** + * @file sparrow_module.cpp + * @brief Python module definition for sparrow-pycapsule using nanobind. + * + * This file defines the "sparrow" Python extension module that exposes + * the SparrowArray class implementing the Arrow PyCapsule Interface. + */ + +#include +#include + +#include +#include + +namespace nb = nanobind; + +namespace sparrow::pycapsule +{ + /** + * @brief Create a SparrowArray from an object implementing __arrow_c_array__. + */ + SparrowArray sparrow_array_from_arrow(const nb::object& arrow_array) + { + if (!nb::hasattr(arrow_array, "__arrow_c_array__")) + { + throw nb::type_error( + "Input object must implement __arrow_c_array__ (ArrowArrayExportable protocol)" + ); + } + + nb::object capsules = arrow_array.attr("__arrow_c_array__")(); + + if (!nb::isinstance(capsules) || nb::len(capsules) != 2) + { + throw nb::type_error("__arrow_c_array__ must return a tuple of 2 elements"); + } + + auto capsule_tuple = nb::cast(capsules); + PyObject* schema_capsule = capsule_tuple[0].ptr(); + PyObject* array_capsule = capsule_tuple[1].ptr(); + + return {schema_capsule, array_capsule}; + } + + /** + * @brief Export a SparrowArray to Arrow PyCapsules. + */ + nb::tuple sparrow_array_to_arrow(const SparrowArray& self, nb::object /*requested_schema*/) + { + auto [schema, array] = self.export_to_capsules(); + return nb::make_tuple(nb::steal(schema), nb::steal(array)); + } + + /** + * @brief Register the SparrowArray class with a nanobind module. + */ + void register_sparrow_array(nb::module_& m) + { + nb::class_(m, "SparrowArray", + "SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n" + "This class wraps a sparrow array and implements the Arrow PyCapsule\n" + "Interface, allowing direct integration with libraries like Polars.\n\n" + "Example\n" + "-------\n" + ">>> import pyarrow as pa\n" + ">>> import sparrow\n" + ">>> pa_array = pa.array([1, 2, None, 4])\n" + ">>> sparrow_array = sparrow.SparrowArray.from_arrow(pa_array)") + .def_static("from_arrow", &sparrow_array_from_arrow, + nb::arg("arrow_array"), + "Create a SparrowArray from an Arrow-compatible object.\n\n" + "Parameters\n" + "----------\n" + "arrow_array : ArrowArrayExportable\n" + " An object implementing __arrow_c_array__ (e.g., PyArrow array).\n\n" + "Returns\n" + "-------\n" + "SparrowArray\n" + " A new SparrowArray wrapping the input data.") + .def("__arrow_c_array__", &sparrow_array_to_arrow, + nb::arg("requested_schema") = nb::none(), + "Export the array via the Arrow PyCapsule interface.\n\n" + "Returns\n" + "-------\n" + "tuple[object, object]\n" + " A tuple of (schema_capsule, array_capsule).") + .def("size", &SparrowArray::size, + "Get the number of elements in the array.") + .def("__len__", &SparrowArray::size); + } + +} // namespace sparrow::pycapsule + +NB_MODULE(SPARROW_MODULE_NAME, m) +{ + m.doc() = "Sparrow Rockfinch - High-performance Arrow array library for Python.\n\n" + "This module provides the SparrowArray class which implements the\n" + "Arrow PyCapsule Interface for zero-copy data exchange with other\n" + "Arrow-compatible libraries like Polars and PyArrow."; + + sparrow::pycapsule::register_sparrow_array(m); +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 49910e8..a591d7f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -94,6 +94,10 @@ target_link_libraries(test_sparrow_helper target_compile_features(test_sparrow_helper PRIVATE cxx_std_20) +# Define the module name macro to match the output filename (includes debug suffix) +target_compile_definitions(test_sparrow_helper PRIVATE + TEST_SPARROW_HELPER_MODULE_NAME=$ +) set_target_properties(test_sparrow_helper PROPERTIES FOLDER tests LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} @@ -112,11 +116,9 @@ if(Python_Interpreter_FOUND) ) # Set environment variables so Python can find the libraries - # Use generator expressions to get the actual library paths from targets set_tests_properties(test_sparrow_integration PROPERTIES - ENVIRONMENT "TEST_SPARROW_HELPER_LIB_PATH=$;SPARROW_PYCAPSULE_LIB_PATH=$" + ENVIRONMENT "SPARROW_MODULE_PATH=$;TEST_SPARROW_HELPER_LIB_PATH=$" TIMEOUT 300 - DEPENDS test_sparrow_helper ) message(STATUS "Added sparrow integration test (Python ${Python_VERSION})") @@ -135,17 +137,17 @@ if(Python_Interpreter_FOUND) COMMAND ${Python_EXECUTABLE} -c "import pyarrow" || ${CMAKE_COMMAND} -E cmake_echo_color --red "ERROR: pyarrow not installed. Install with: pip install pyarrow" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Library paths:" + COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_MODULE_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" - COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E env + "SPARROW_MODULE_PATH=$" "TEST_SPARROW_HELPER_LIB_PATH=$" - "SPARROW_PYCAPSULE_LIB_PATH=$" ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_sparrow_integration.py COMMAND ${CMAKE_COMMAND} -E echo "" - DEPENDS test_sparrow_helper sparrow-pycapsule + DEPENDS test_sparrow_helper sparrow_rockfinch sparrow-pycapsule WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "Running Sparrow integration tests directly" USES_TERMINAL diff --git a/test/sparrow_helpers.py b/test/sparrow_helpers.py index faa5b8e..e387e96 100644 --- a/test/sparrow_helpers.py +++ b/test/sparrow_helpers.py @@ -11,6 +11,8 @@ import os from pathlib import Path from typing import Any, Protocol, Tuple +import importlib.util + class ArrowArrayExportable(Protocol): """Protocol for objects implementing the Arrow PyCapsule Interface.""" @@ -41,29 +43,35 @@ def from_arrow(cls, arrow_array: ArrowArrayExportable) -> "SparrowArrayType": ... -def _setup_module_path() -> None: - """Add the build directory to Python path so we can import test_sparrow_helper.""" - import importlib.util - - # Check for environment variable first (can be either LIB_PATH or PATH variant) - helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') or os.environ.get('TEST_SPARROW_HELPER_PATH') +def _load_module_from_path(module_name: str, file_path: Path): + """Load a Python extension module from a file path.""" + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec and spec.loader: + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + raise ImportError(f"Could not load {module_name} from {file_path}") + + +def _setup_modules() -> None: + """Set up the sparrow_rockfinch and test_sparrow_helper modules.""" + # Load the main sparrow_rockfinch module + sparrow_path = os.environ.get('SPARROW_MODULE_PATH') + if sparrow_path: + sparrow_file = Path(sparrow_path) + if sparrow_file.exists(): + _load_module_from_path("sparrow_rockfinch", sparrow_file) + + # Load the test helper module + helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') if helper_path: helper_file = Path(helper_path) if helper_file.exists(): - # Load module directly from the given path - spec = importlib.util.spec_from_file_location("test_sparrow_helper", helper_file) - if spec and spec.loader: - module = importlib.util.module_from_spec(spec) - sys.modules["test_sparrow_helper"] = module - spec.loader.exec_module(module) - return - # Also try adding the parent directory to path - module_dir = helper_file.parent - if module_dir.exists(): - sys.path.insert(0, str(module_dir)) + _load_module_from_path("test_sparrow_helper", helper_file) return - - # Try to find in build directory + + # Fallback: try to find modules in build directory test_dir = Path(__file__).parent build_dirs = [ test_dir.parent / "build" / "bin" / "Debug", @@ -77,13 +85,16 @@ def _setup_module_path() -> None: return raise ImportError( - "Could not find test_sparrow_helper module. " - "Build the project first or set TEST_SPARROW_HELPER_LIB_PATH." + "Could not find sparrow_rockfinch or test_sparrow_helper module. " + "Build the project first or set SPARROW_MODULE_PATH and TEST_SPARROW_HELPER_LIB_PATH." ) -# Set up module path and import the C++ module -_setup_module_path() +# Set up modules +_setup_modules() -# Import the native Python extension module that provides SparrowArray -from test_sparrow_helper import SparrowArray # noqa: E402 +# Import from the sparrow_rockfinch module (try release first, then debug) +try: + from sparrow_rockfinch import SparrowArray # noqa: E402 +except ImportError: + from sparrow_rockfinchd import SparrowArray # noqa: E402 diff --git a/test/test_sparrow_helper_module.cpp b/test/test_sparrow_helper_module.cpp index c22ba5e..c2fc66a 100644 --- a/test/test_sparrow_helper_module.cpp +++ b/test/test_sparrow_helper_module.cpp @@ -1,114 +1,52 @@ +/** + * @file test_sparrow_helper_module.cpp + * @brief Test utilities for sparrow-pycapsule Python integration tests. + * + * This module provides helper functions for creating test arrays in C++. + * The main SparrowArray class is defined in the sparrow module. + */ + #include #include #include -#include #include #include #include -#include #include namespace nb = nanobind; -/** - * Create a test sparrow array with sample data. - */ -sparrow::array create_test_sparrow_array() +namespace { - // Create a test array with nullable integers - std::vector> values = { - sparrow::make_nullable(10, true), - sparrow::make_nullable(20, true), - sparrow::make_nullable(0, false), // null - sparrow::make_nullable(40, true), - sparrow::make_nullable(50, true) - }; - - sparrow::primitive_array prim_array(std::move(values)); - return sparrow::array(std::move(prim_array)); -} - -/** - * Create a SparrowArray from an object implementing __arrow_c_array__. - */ -sparrow::pycapsule::SparrowArray create_sparrow_array_from_arrow(nb::object arrow_array) -{ - // Get __arrow_c_array__ method from the input object - if (!nb::hasattr(arrow_array, "__arrow_c_array__")) - { - throw nb::type_error( - "Input object must implement __arrow_c_array__ (ArrowArrayExportable protocol)" - ); - } - - // Call __arrow_c_array__() to get the capsules - nb::object capsules = arrow_array.attr("__arrow_c_array__")(); - - // Unpack the tuple (schema_capsule, array_capsule) - if (!nb::isinstance(capsules) || nb::len(capsules) != 2) + /** + * @brief Create a test sparrow array with sample nullable int32 data. + */ + sparrow::pycapsule::SparrowArray create_test_array() { - throw nb::type_error("__arrow_c_array__ must return a tuple of 2 elements"); + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + return sparrow::pycapsule::SparrowArray(sparrow::array(std::move(prim_array))); } - - nb::tuple capsule_tuple = nb::cast(capsules); - PyObject* schema_capsule = capsule_tuple[0].ptr(); - PyObject* array_capsule = capsule_tuple[1].ptr(); - - return sparrow::pycapsule::SparrowArray(schema_capsule, array_capsule); } -NB_MODULE(test_sparrow_helper, m) +NB_MODULE(TEST_SPARROW_HELPER_MODULE_NAME, m) { - m.doc() = "Native Python extension providing SparrowArray type for Arrow data exchange.\n" - "Higher-level helpers are available in sparrow_helpers.py."; - - // Define the SparrowArray class using nanobind - nb::class_(m, "SparrowArray", - "SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n" - "This class wraps a sparrow array and implements the Arrow PyCapsule\n" - "Interface (ArrowArrayExportable protocol), allowing it to be passed\n" - "directly to libraries like Polars via pl.from_arrow().\n\n" - "To create a SparrowArray from a PyArrow array, use:\n" - " sparrow_array = SparrowArray.from_arrow(pyarrow_array)") - .def_static("from_arrow", &create_sparrow_array_from_arrow, nb::arg("arrow_array"), - "Construct a SparrowArray from an Arrow-compatible object.\n\n" - "Parameters\n" - "----------\n" - "arrow_array : ArrowArrayExportable\n" - " An object implementing __arrow_c_array__ (e.g., PyArrow array).\n\n" - "Returns\n" - "-------\n" - "SparrowArray\n" - " A new SparrowArray wrapping the input data.") - .def("__arrow_c_array__", [](const sparrow::pycapsule::SparrowArray& self, nb::object /*requested_schema*/) { - auto [schema, array] = self.export_to_capsules(); - // Create a tuple and return ownership to Python - nb::object schema_obj = nb::steal(schema); - nb::object array_obj = nb::steal(array); - return nb::make_tuple(schema_obj, array_obj); - }, nb::arg("requested_schema") = nb::none(), - "Export the array via the Arrow PyCapsule interface.\n\n" - "Parameters\n" - "----------\n" - "requested_schema : object, optional\n" - " Requested schema for the output (typically ignored).\n\n" - "Returns\n" - "-------\n" - "tuple[object, object]\n" - " A tuple of (schema_capsule, array_capsule).") - .def("size", &sparrow::pycapsule::SparrowArray::size, - "Get the number of elements in the array.\n\n" - "Returns\n" - "-------\n" - "int\n" - " The size of the array."); - - // Add the test helper function - m.def("create_test_array", []() { - sparrow::array arr = create_test_sparrow_array(); - return sparrow::pycapsule::SparrowArray(std::move(arr)); - }, "Create a test array and return a SparrowArray object implementing __arrow_c_array__."); + m.doc() = "Test utilities for sparrow-pycapsule integration tests."; + + m.def("create_test_array", &create_test_array, + "Create a test int32 array with values [10, 20, null, 40, 50].\n\n" + "Returns\n" + "-------\n" + "sparrow.SparrowArray\n" + " A SparrowArray for testing purposes."); } diff --git a/test/test_sparrow_integration.py b/test/test_sparrow_integration.py index 9e79b57..a271a30 100644 --- a/test/test_sparrow_integration.py +++ b/test/test_sparrow_integration.py @@ -26,8 +26,11 @@ SparrowArrayType, ) -# Import the C++ module for create_test_array -import test_sparrow_helper # noqa: E402 +# Import the C++ module for create_test_array (try release first, then debug) +try: + import test_sparrow_helper # noqa: E402 +except ImportError: + import test_sparrow_helperd as test_sparrow_helper # noqa: E402 def arrow_array_to_series( @@ -86,10 +89,11 @@ def test_sparrow_array_type(self): f"Expected type 'SparrowArray', got '{type_name}'" ) - # Check the module-qualified name (test module, not production module) + # Check the module-qualified name (allow debug 'd' suffix) full_name = f"{type(sparrow_array).__module__}.{type_name}" - assert full_name == "test_sparrow_helper.SparrowArray", ( - f"Expected 'test_sparrow_helper.SparrowArray', got '{full_name}'" + valid_names = ("sparrow_rockfinch.SparrowArray", "sparrow_rockfinchd.SparrowArray") + assert full_name in valid_names, ( + f"Expected one of {valid_names}, got '{full_name}'" ) def test_sparrow_to_polars_series(self): From bc28535e7fc212c0b69a60ad5995533b97d11bd5 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:19:13 +0100 Subject: [PATCH 03/19] Add install --- CMakeLists.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0668c02..a3833d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -262,6 +262,20 @@ if(SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE) set_target_properties(sparrow_rockfinch PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${BINARY_BUILD_DIR} ) + + # Install Python module (for pip install / wheel building) + install(TARGETS sparrow_rockfinch + LIBRARY DESTINATION sparrow_rockfinch + RUNTIME DESTINATION sparrow_rockfinch + ) + # Install __init__.py for the package + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/python/sparrow_rockfinch/__init__.py + DESTINATION sparrow_rockfinch + ) + # Install py.typed marker for type hints + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/python/sparrow_rockfinch/py.typed + DESTINATION sparrow_rockfinch + ) endif() # Tests From b9a054c8bad5836b2888e0b203cf9f5369efee65 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:23:30 +0100 Subject: [PATCH 04/19] add nanobind to the dependencies --- environment-dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment-dev.yml b/environment-dev.yml index ea07591..b457707 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -8,6 +8,7 @@ dependencies: # Dependencies - sparrow-devel - python + - nanobind # Tests - doctest - polars From ae5077a7fe1dd85ff6ffa7ae14940244938c133d Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:29:19 +0100 Subject: [PATCH 05/19] add nanobind-abi --- environment-dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment-dev.yml b/environment-dev.yml index b457707..46e68d2 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -9,6 +9,7 @@ dependencies: - sparrow-devel - python - nanobind + - nanobind-abi # Tests - doctest - polars From 4b5addde40c2cd7e88a6bd2464a4c737a689c51f Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:29:30 +0100 Subject: [PATCH 06/19] Run test in the workflow --- .github/workflows/linux.yml | 5 +++++ .github/workflows/osx.yml | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index cc997aa..4c455c7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -90,6 +90,11 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run sparrow integration tests + if: matrix.build_shared == 'ON' + working-directory: build + run: cmake --build . --target run_sparrow_tests_direct + - name: Install working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 41dbd32..e5951ac 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -55,10 +55,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - # - name: Run Sparrow integration tests - # if: matrix.build_shared == 'ON' - # working-directory: build - # run: cmake --build . --target run_sparrow_tests_direct + - name: Run Sparrow integration tests + if: matrix.build_shared == 'ON' + working-directory: build + run: cmake --build . --target run_sparrow_tests_direct - name: Install working-directory: build From 8c60a2e603d3814367e48851681e13200d631ea9 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:40:58 +0100 Subject: [PATCH 07/19] try fix --- .github/workflows/linux.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 4c455c7..f6035a9 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -69,6 +69,14 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Python test dependencies + run: pip install pytest polars pyarrow + - name: Configure using cmake run: | cmake -G Ninja \ From 01dea21c9dea6eaf0328bb97de1f981cf58a3cdb Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:47:54 +0100 Subject: [PATCH 08/19] wip --- .github/workflows/linux.yml | 4 +++- .github/workflows/osx.yml | 2 ++ .github/workflows/windows.yml | 6 ++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index f6035a9..241178b 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -56,6 +56,7 @@ jobs: run: cmake --build . --target run_sparrow_tests_direct - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: cmake --install . @@ -70,7 +71,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' @@ -104,5 +105,6 @@ jobs: run: cmake --build . --target run_sparrow_tests_direct - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index e5951ac..4438df8 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -61,6 +61,7 @@ jobs: run: cmake --build . --target run_sparrow_tests_direct - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: cmake --install . @@ -101,5 +102,6 @@ jobs: run: cmake --build . --target run_tests_with_junit_report - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 24a1885..0e563e7 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -19,7 +19,7 @@ jobs: build_shared: [ON, OFF] steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Create build environment uses: mamba-org/setup-micromamba@v2 @@ -61,6 +61,7 @@ jobs: run: cmake --build . --config ${{ matrix.build_type }} --target run_sparrow_tests_direct - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: cmake --install . --config ${{ matrix.build_type }} @@ -72,7 +73,7 @@ jobs: build_shared: [ON, OFF] steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Configure using cmake run: | @@ -95,6 +96,7 @@ jobs: # run: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report - name: Install + if: matrix.build_shared == 'ON' working-directory: build run: cmake --install . --config ${{ matrix.build_type }} From d92aa5cd4ef2eb9f11932733fdee42d8f30eb763 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 14:56:54 +0100 Subject: [PATCH 09/19] try fix --- cmake/external_dependencies.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/external_dependencies.cmake b/cmake/external_dependencies.cmake index 6d1de08..7f22c95 100644 --- a/cmake/external_dependencies.cmake +++ b/cmake/external_dependencies.cmake @@ -66,6 +66,10 @@ endif() find_package(Python REQUIRED COMPONENTS Interpreter Development.Module Development.Embed) +execute_process( + COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT) + find_package_or_fetch( PACKAGE_NAME nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git From 137f0adf5dffa6b2463804f45a689af5366af44b Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 15:00:43 +0100 Subject: [PATCH 10/19] try fix --- test/sparrow_helpers.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/test/sparrow_helpers.py b/test/sparrow_helpers.py index e387e96..7ac0d6b 100644 --- a/test/sparrow_helpers.py +++ b/test/sparrow_helpers.py @@ -43,8 +43,28 @@ def from_arrow(cls, arrow_array: ArrowArrayExportable) -> "SparrowArrayType": ... +def _get_module_name_from_path(file_path: Path) -> str: + """Extract the module name from a .so/.pyd file path. + + Handles names like 'sparrow_rockfinchd.cpython-312-x86_64-linux-gnu.so' + and extracts 'sparrow_rockfinchd'. + """ + name = file_path.name + # Remove platform suffix (e.g., .cpython-312-x86_64-linux-gnu.so) + if '.cpython-' in name: + name = name.split('.cpython-')[0] + elif name.endswith('.so'): + name = name[:-3] + elif name.endswith('.pyd'): + name = name[:-4] + return name + + def _load_module_from_path(module_name: str, file_path: Path): - """Load a Python extension module from a file path.""" + """Load a Python extension module from a file path. + + The module_name should match the PyInit_ function in the compiled module. + """ spec = importlib.util.spec_from_file_location(module_name, file_path) if spec and spec.loader: module = importlib.util.module_from_spec(spec) @@ -61,14 +81,18 @@ def _setup_modules() -> None: if sparrow_path: sparrow_file = Path(sparrow_path) if sparrow_file.exists(): - _load_module_from_path("sparrow_rockfinch", sparrow_file) + # Use actual module name from file (handles debug 'd' suffix) + module_name = _get_module_name_from_path(sparrow_file) + _load_module_from_path(module_name, sparrow_file) # Load the test helper module helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') if helper_path: helper_file = Path(helper_path) if helper_file.exists(): - _load_module_from_path("test_sparrow_helper", helper_file) + # Use actual module name from file (handles debug 'd' suffix) + module_name = _get_module_name_from_path(helper_file) + _load_module_from_path(module_name, helper_file) return # Fallback: try to find modules in build directory From b2baa5e296cc861c4a1194667712e1b2160a282c Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 15:03:26 +0100 Subject: [PATCH 11/19] fix --- python/sparrow_rockfinch/__init__.py | 70 ++++++++++++++++++++++++++++ python/sparrow_rockfinch/py.typed | 0 2 files changed, 70 insertions(+) create mode 100644 python/sparrow_rockfinch/__init__.py create mode 100644 python/sparrow_rockfinch/py.typed diff --git a/python/sparrow_rockfinch/__init__.py b/python/sparrow_rockfinch/__init__.py new file mode 100644 index 0000000..ca90c26 --- /dev/null +++ b/python/sparrow_rockfinch/__init__.py @@ -0,0 +1,70 @@ +""" +Sparrow Rockfinch - High-performance Arrow array library for Python. + +This module provides the SparrowArray class which implements the Arrow PyCapsule +Interface for zero-copy data exchange with other Arrow-compatible libraries +like Polars and PyArrow. + +Example +------- +>>> import pyarrow as pa +>>> from sparrow_rockfinch import SparrowArray +>>> +>>> # Create from PyArrow +>>> pa_array = pa.array([1, 2, None, 4, 5]) +>>> sparrow_array = SparrowArray.from_arrow(pa_array) +>>> +>>> # Use with Polars (zero-copy) +>>> import polars as pl +>>> series = pl.from_arrow(sparrow_array) +""" + +from __future__ import annotations + +from typing import Any, Protocol, Tuple, TYPE_CHECKING + +# Import the compiled extension module +# The actual module name matches the compiled .so file +try: + from sparrow_rockfinch.sparrow_rockfinch import SparrowArray +except ImportError: + # Fallback for development builds where module might be at top level + try: + from .sparrow_rockfinch import SparrowArray + except ImportError: + # Last resort: try importing from parent (editable installs) + from sparrow_rockfinch import SparrowArray as _SparrowArray + SparrowArray = _SparrowArray + + +class ArrowArrayExportable(Protocol): + """Protocol for objects implementing the Arrow PyCapsule Interface. + + Any object implementing this protocol can be used to create a SparrowArray. + This includes PyArrow arrays, Polars Series, and other Arrow-compatible types. + """ + + def __arrow_c_array__( + self, requested_schema: Any = None + ) -> Tuple[Any, Any]: + """Export the array as Arrow PyCapsules. + + Parameters + ---------- + requested_schema : object, optional + The requested schema for the export (typically None). + + Returns + ------- + Tuple[object, object] + A tuple of (schema_capsule, array_capsule). + """ + ... + + +__all__ = [ + "SparrowArray", + "ArrowArrayExportable", +] + +__version__ = "0.1.0" diff --git a/python/sparrow_rockfinch/py.typed b/python/sparrow_rockfinch/py.typed new file mode 100644 index 0000000..e69de29 From 535b12379a8615388e8589325f72797d583584a8 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 15:44:20 +0100 Subject: [PATCH 12/19] fix --- .github/workflows/osx.yml | 5 +++++ test/sparrow_helpers.py | 17 ++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 4438df8..094f265 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -101,6 +101,11 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run sparrow integration tests + if: matrix.build_shared == 'ON' + working-directory: build + run: cmake --build . --target run_sparrow_tests_direct + - name: Install if: matrix.build_shared == 'ON' working-directory: build diff --git a/test/sparrow_helpers.py b/test/sparrow_helpers.py index 7ac0d6b..78dfaed 100644 --- a/test/sparrow_helpers.py +++ b/test/sparrow_helpers.py @@ -46,17 +46,16 @@ def from_arrow(cls, arrow_array: ArrowArrayExportable) -> "SparrowArrayType": def _get_module_name_from_path(file_path: Path) -> str: """Extract the module name from a .so/.pyd file path. - Handles names like 'sparrow_rockfinchd.cpython-312-x86_64-linux-gnu.so' - and extracts 'sparrow_rockfinchd'. + Handles various naming patterns: + - Linux: 'sparrow_rockfinchd.cpython-312-x86_64-linux-gnu.so' -> 'sparrow_rockfinchd' + - Windows: 'sparrow_rockfinch.cp314-win_amd64.pyd' -> 'sparrow_rockfinch' + - Simple: 'module.so' or 'module.pyd' -> 'module' """ name = file_path.name - # Remove platform suffix (e.g., .cpython-312-x86_64-linux-gnu.so) - if '.cpython-' in name: - name = name.split('.cpython-')[0] - elif name.endswith('.so'): - name = name[:-3] - elif name.endswith('.pyd'): - name = name[:-4] + # The module name is always the part before the first dot + # This handles all patterns: name.cpython-..., name.cp314-..., name.so, name.pyd + if '.' in name: + name = name.split('.')[0] return name From 9a377ae2e699417b1f079cf24cfdec8e753c7a53 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 15:49:51 +0100 Subject: [PATCH 13/19] fix --- test/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a591d7f..8903be8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -127,6 +127,9 @@ else() endif() if(Python_Interpreter_FOUND) + # Get the directory containing the sparrow-pycapsule library for runtime + set(SPARROW_LIB_DIR "$") + add_custom_target(run_sparrow_tests_direct COMMAND ${CMAKE_COMMAND} -E echo "==================================" COMMAND ${CMAKE_COMMAND} -E echo "Sparrow Integration Test Runner" @@ -139,12 +142,15 @@ if(Python_Interpreter_FOUND) COMMAND ${CMAKE_COMMAND} -E echo "Library paths:" COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_MODULE_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_LIB_DIR=${SPARROW_LIB_DIR}" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E env "SPARROW_MODULE_PATH=$" "TEST_SPARROW_HELPER_LIB_PATH=$" + "LD_LIBRARY_PATH=${SPARROW_LIB_DIR}:$ENV{LD_LIBRARY_PATH}" + "DYLD_LIBRARY_PATH=${SPARROW_LIB_DIR}:$ENV{DYLD_LIBRARY_PATH}" ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_sparrow_integration.py COMMAND ${CMAKE_COMMAND} -E echo "" DEPENDS test_sparrow_helper sparrow_rockfinch sparrow-pycapsule From 3f052961b44a4f5a6e5a95971812b7d9dfba9c87 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 15:53:19 +0100 Subject: [PATCH 14/19] try fix --- CMakeLists.txt | 3 +++ test/CMakeLists.txt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3833d1..6891ed0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -261,6 +261,9 @@ if(SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE) ) set_target_properties(sparrow_rockfinch PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${BINARY_BUILD_DIR} + # Set RPATH so the module can find libsparrow-pycapsule at runtime + BUILD_RPATH "$" + INSTALL_RPATH "$ORIGIN;@loader_path" ) # Install Python module (for pip install / wheel building) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8903be8..90b1341 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -101,6 +101,9 @@ target_compile_definitions(test_sparrow_helper PRIVATE set_target_properties(test_sparrow_helper PROPERTIES FOLDER tests LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} + # Set RPATH so the module can find libsparrow-pycapsule at runtime + BUILD_RPATH "$" + INSTALL_RPATH "$ORIGIN;@loader_path" ) # Python integration test From 53fcd77af36bdf0425981aa76ef7c939513e50fa Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 16:05:31 +0100 Subject: [PATCH 15/19] try fix --- .github/workflows/windows.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0e563e7..7b37e74 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -90,10 +90,14 @@ jobs: working-directory: build run: cmake --build . --config ${{ matrix.build_type }} --target test_sparrow_pycapsule_lib - # TODO: This crashes in the CI - #- name: Run tests - # working-directory: build - # run: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report + - name: Run tests + working-directory: build + run: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report + + - name: Run Sparrow integration tests + if: matrix.build_shared == 'ON' + working-directory: build + run: cmake --build . --config ${{ matrix.build_type }} --target run_sparrow_tests_direct - name: Install if: matrix.build_shared == 'ON' From 4607b2adf18abc397c2eceaf8588e31dfb0a9078 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 16:11:11 +0100 Subject: [PATCH 16/19] try fix --- .github/workflows/osx.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 094f265..cfe2cf1 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -80,6 +80,14 @@ jobs: sudo xcode-select --switch /Applications/Xcode_16.4.app/Contents/Developer xcodebuild -version + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install Python test dependencies + run: pip install pytest polars pyarrow + - name: Configure using cmake run: | cmake -G Ninja \ From e529b9b41c9af592322f542a284eb9091a4f41e3 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 16:22:39 +0100 Subject: [PATCH 17/19] ttry --- .github/workflows/osx.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index cfe2cf1..e5c1cf7 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -95,7 +95,8 @@ jobs: -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DSPARROW_PYCAPSULE_BUILD_SHARED=${{ matrix.build_shared }} \ -DSPARROW_PYCAPSULE_BUILD_TESTS=ON \ - -DFETCH_DEPENDENCIES_WITH_CMAKE=MISSING + -DFETCH_DEPENDENCIES_WITH_CMAKE=MISSING \ + -DPython_EXECUTABLE=$(which python3) - name: Build sparrow-pycapsule working-directory: build From b5212bcf4476fed40f56daf4b002e86be7869ae7 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 4 Dec 2025 16:36:57 +0100 Subject: [PATCH 18/19] try fixc --- CMakeLists.txt | 4 +++- test/CMakeLists.txt | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6891ed0..8c85344 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -263,7 +263,9 @@ if(SPARROW_PYCAPSULE_BUILD_PYTHON_MODULE) LIBRARY_OUTPUT_DIRECTORY ${BINARY_BUILD_DIR} # Set RPATH so the module can find libsparrow-pycapsule at runtime BUILD_RPATH "$" - INSTALL_RPATH "$ORIGIN;@loader_path" + BUILD_RPATH_USE_ORIGIN ON + MACOSX_RPATH ON + INSTALL_RPATH "@loader_path" ) # Install Python module (for pip install / wheel building) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 90b1341..53bab02 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -103,7 +103,9 @@ set_target_properties(test_sparrow_helper PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} # Set RPATH so the module can find libsparrow-pycapsule at runtime BUILD_RPATH "$" - INSTALL_RPATH "$ORIGIN;@loader_path" + BUILD_RPATH_USE_ORIGIN ON + MACOSX_RPATH ON + INSTALL_RPATH "@loader_path" ) # Python integration test @@ -146,6 +148,10 @@ if(Python_Interpreter_FOUND) COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_MODULE_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_LIB_DIR=${SPARROW_LIB_DIR}" + COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB=$" + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Checking library exists..." + COMMAND ls -la ${SPARROW_LIB_DIR} COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." COMMAND ${CMAKE_COMMAND} -E echo "" From c570fecc5d832f4d5082d91c0d60ed45d335cbf8 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 5 Dec 2025 13:36:39 +0100 Subject: [PATCH 19/19] fix example --- src/sparrow_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparrow_module.cpp b/src/sparrow_module.cpp index 8c6016b..34aa2f0 100644 --- a/src/sparrow_module.cpp +++ b/src/sparrow_module.cpp @@ -63,7 +63,7 @@ namespace sparrow::pycapsule "Example\n" "-------\n" ">>> import pyarrow as pa\n" - ">>> import sparrow\n" + ">>> import sparrow_rockfinch as sp\n" ">>> pa_array = pa.array([1, 2, None, 4])\n" ">>> sparrow_array = sparrow.SparrowArray.from_arrow(pa_array)") .def_static("from_arrow", &sparrow_array_from_arrow,