From b1d6f6b9638adf4768412f5a9b874f42a3128c06 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 13:25:05 +0100 Subject: [PATCH 01/18] Add polars tests --- README.md | 280 +++++++++++++++++++++- src/pycapsule.cpp | 87 ++++++- test/CMakeLists.txt | 111 +++++++++ test/check_deps.py | 21 ++ test/test_library_load.py | 122 ++++++++++ test/test_polars_helper.cpp | 186 +++++++++++++++ test/test_polars_integration.py | 396 ++++++++++++++++++++++++++++++++ 7 files changed, 1193 insertions(+), 10 deletions(-) create mode 100644 test/check_deps.py create mode 100644 test/test_library_load.py create mode 100644 test/test_polars_helper.cpp create mode 100644 test/test_polars_integration.py diff --git a/README.md b/README.md index a3bf936..c857321 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,280 @@ # sparrow-pycapsule -The Sparrow PyCapsuleInterface + +The Sparrow PyCapsule Interface - A C++ library for exchanging Apache Arrow data between C++ and Python using the Arrow C Data Interface via PyCapsules. + +## Overview + +`sparrow-pycapsule` provides a clean C++ API for: +- Exporting sparrow arrays to Python as PyCapsules (Arrow C Data Interface) +- Importing Arrow data from Python PyCapsules into sparrow arrays +- Zero-copy data exchange with Python libraries like Polars, PyArrow, and pandas + +## Features + +- ✅ **Zero-copy data exchange** between C++ and Python +- ✅ **Arrow C Data Interface** compliant +- ✅ **PyCapsule-based** for safe memory management +- ✅ **Compatible with Polars, PyArrow, pandas** and other Arrow-based libraries +- ✅ **Bidirectional** data flow (C++ ↔ Python) +- ✅ **Type-safe** with proper ownership semantics + +## Building + +### Prerequisites + +```bash +# Using conda (recommended) +conda env create -f environment-dev.yml +conda activate sparrow-pycapsule + +# Or install manually +# - CMake >= 3.28 +# - C++20 compiler +# - Python 3.x with development headers +# - sparrow library +``` + +### Build Instructions + +```bash +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Release +cmake --build . +``` + +### Build with Tests + +```bash +mkdir build && cd build +cmake .. -DSPARROW_PYCAPSULE_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug +cmake --build . +ctest --output-on-failure +``` + +## Usage Example + +### C++ Side: Exporting Data + +```cpp +#include +#include + +// Create a sparrow array +sparrow::array my_array = /* ... */; + +// Export to PyCapsules for Python consumption +auto [schema_capsule, array_capsule] = + sparrow::pycapsule::export_array_to_capsules(my_array); + +// Pass capsules to Python (via Python C API, pybind11, etc.) +``` + +### Python Side: Consuming C++ Data + +```python +import polars as pl +import pyarrow as pa + +# Receive capsules from C++ +# schema_capsule, array_capsule = get_from_cpp() + +# Import into PyArrow +arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) + +# Convert to Polars +series = pl.from_arrow(arrow_array) + +# Use in Polars DataFrame +df = pl.DataFrame({"my_column": series}) +``` + +### Python Side: Exporting to C++ + +```python +import polars as pl + +# Create Polars data +series = pl.Series([1, 2, None, 4, 5]) + +# Convert to Arrow and export as capsules +arrow_array = series.to_arrow() +schema_capsule, array_capsule = arrow_array.__arrow_c_array__() + +# Pass to C++ +``` + +### C++ Side: Importing from Python + +```cpp +#include + +// Receive capsules from Python +PyObject* schema_capsule = /* ... */; +PyObject* array_capsule = /* ... */; + +// Import into sparrow array +sparrow::array imported_array = + sparrow::pycapsule::import_array_from_capsules( + schema_capsule, array_capsule); + +// Use the array +std::cout << "Array size: " << imported_array.size() << std::endl; +``` + +## Testing + +### C++ Unit Tests + +```bash +cd build +./bin/Debug/test_sparrow_pycapsule_lib +``` + +### Polars Integration Tests + +Test bidirectional data exchange with Polars: + +```bash +# Run via CMake target (recommended) +cd build +cmake --build . --target run_polars_tests + +# Or with direct execution (better output) +cmake --build . --target run_polars_tests_direct + +# Or via CTest +ctest -R test_polars_integration --output-on-failure + +# Check dependencies first +cmake --build . --target check_polars_deps + +# Or run Python script directly +cd test +python test_polars_integration.py +``` + +See [test/README_POLARS_TESTS.md](test/README_POLARS_TESTS.md) for detailed documentation. + +## CMake Targets + +The project provides several convenient CMake targets for testing: + +| Target | Description | +|--------|-------------| +| `run_tests` | Run all C++ unit tests | +| `run_tests_with_junit_report` | Run C++ tests with JUnit XML output | +| `run_polars_tests` | Run Polars integration test via CTest | +| `run_polars_tests_direct` | Run Polars test directly (recommended, better output) | +| `check_polars_deps` | Check Python dependencies (polars, pyarrow) | +| `debug_polars_tests` | Run Polars tests with verbose debugging output | +| `test_library_load` | Minimal test to debug library loading issues | + +**Usage:** +```bash +cd build + +# Run C++ tests +cmake --build . --target run_tests + +# Run Polars integration tests +cmake --build . --target run_polars_tests_direct + +# Check dependencies first +cmake --build . --target check_polars_deps + +# Or use CTest to run all tests +ctest --output-on-failure +``` + +### Debugging Test Failures + +If you encounter segmentation faults or other issues: + +```bash +cd build + +# Run minimal library loading test (step-by-step debugging) +cmake --build . --target test_library_load + +# Run with full debugging output +cmake --build . --target debug_polars_tests + +# Check that libraries exist and dependencies are correct +cmake --build . --target check_polars_deps +``` + +## API Reference + +### Export Functions + +- `export_arrow_schema_pycapsule(array& arr)` - Export schema to PyCapsule +- `export_arrow_array_pycapsule(array& arr)` - Export array data to PyCapsule +- `export_array_to_capsules(array& arr)` - Export both schema and array (recommended) + +### Import Functions + +- `get_arrow_schema_pycapsule(PyObject* capsule)` - Get ArrowSchema pointer from capsule +- `get_arrow_array_pycapsule(PyObject* capsule)` - Get ArrowArray pointer from capsule +- `import_array_from_capsules(PyObject* schema, PyObject* array)` - Import complete array + +### Memory Management + +- `release_arrow_schema_pycapsule(PyObject* capsule)` - PyCapsule destructor for schema +- `release_arrow_array_pycapsule(PyObject* capsule)` - PyCapsule destructor for array + +All capsules have destructors that properly clean up Arrow structures. + +## Supported Data Types + +The library supports all Arrow data types that sparrow supports: +- Integer types (Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64) +- Floating point (Float32, Float64) +- Boolean +- String (UTF-8) +- And more... + +All types support nullable values via the Arrow null bitmap. + +## Integration with Python Libraries + +### Polars +```python +series = pl.Series([1, 2, 3]) +arrow_array = series.to_arrow() +schema_capsule, array_capsule = arrow_array.__arrow_c_array__() +# Pass to C++ +``` + +### PyArrow +```python +arrow_array = pa.array([1, 2, 3]) +schema_capsule, array_capsule = arrow_array.__arrow_c_array__() +# Pass to C++ +``` + +### pandas (via PyArrow) +```python +import pandas as pd +series = pd.Series([1, 2, 3]) +arrow_array = pa.Array.from_pandas(series) +schema_capsule, array_capsule = arrow_array.__arrow_c_array__() +# Pass to C++ +``` + +## License + +See [LICENSE](LICENSE) file for details. + +## Contributing + +Contributions are welcome! Please ensure: +- Code follows the existing style +- All tests pass (`ctest --output-on-failure`) +- New features include tests +- Documentation is updated + +## Related Projects + +- [sparrow](https://github.com/man-group/sparrow) - Modern C++ library for Apache Arrow +- [Apache Arrow](https://arrow.apache.org/) - Cross-language development platform +- [Polars](https://www.pola.rs/) - Fast DataFrame library diff --git a/src/pycapsule.cpp b/src/pycapsule.cpp index 5d14c7b..989540f 100644 --- a/src/pycapsule.cpp +++ b/src/pycapsule.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -38,8 +39,12 @@ namespace sparrow::pycapsule ArrowSchema* arrow_schema_ptr = new ArrowSchema(); *arrow_schema_ptr = extract_arrow_schema(std::move(arr)); - PyObject* capsule_ptr = PyCapsule_New(arrow_schema_ptr, arrow_schema_str.data(), release_arrow_schema_pycapsule); - if(capsule_ptr == nullptr) + PyObject* capsule_ptr = PyCapsule_New( + arrow_schema_ptr, + arrow_schema_str.data(), + release_arrow_schema_pycapsule + ); + if (capsule_ptr == nullptr) { arrow_schema_ptr->release(arrow_schema_ptr); delete arrow_schema_ptr; @@ -77,8 +82,12 @@ namespace sparrow::pycapsule ArrowArray* arrow_array_ptr = new ArrowArray(); *arrow_array_ptr = extract_arrow_array(std::move(arr)); - PyObject* capsule_ptr = PyCapsule_New(arrow_array_ptr, arrow_array_str.data(), release_arrow_array_pycapsule); - if(capsule_ptr == nullptr) + PyObject* capsule_ptr = PyCapsule_New( + arrow_array_ptr, + arrow_array_str.data(), + release_arrow_array_pycapsule + ); + if (capsule_ptr == nullptr) { arrow_array_ptr->release(arrow_array_ptr); delete arrow_array_ptr; @@ -130,16 +139,76 @@ namespace sparrow::pycapsule ArrowSchema* schema_ptr = new ArrowSchema(std::move(arrow_schema)); ArrowArray* array_ptr = new ArrowArray(std::move(arrow_array)); - PyObject* schema_capsule = PyCapsule_New(schema_ptr, arrow_schema_str.data(), release_arrow_schema_pycapsule); - if (!schema_capsule) { + // Check if Python is initialized before creating capsules + if (!Py_IsInitialized()) + { delete schema_ptr; delete array_ptr; - return {nullptr, nullptr}; + throw std::runtime_error("Python is not initialized. Cannot create PyCapsules."); } - PyObject* array_capsule = PyCapsule_New(array_ptr, arrow_array_str.data(), release_arrow_array_pycapsule); - if (!array_capsule) { + + PyObject* schema_capsule = PyCapsule_New( + schema_ptr, + arrow_schema_str.data(), + release_arrow_schema_pycapsule + ); + + if (!schema_capsule) + { + // Check for Python error + if (PyErr_Occurred()) + { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + PyObject* str_value = PyObject_Str(value); + const char* error_msg = str_value ? PyUnicode_AsUTF8(str_value) : "Unknown error"; + + std::string error_str = std::string("PyCapsule_New failed for schema: ") + error_msg; + + Py_XDECREF(str_value); + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + + delete schema_ptr; + delete array_ptr; + throw std::runtime_error(error_str); + } delete schema_ptr; delete array_ptr; + return {nullptr, nullptr}; + } + + PyObject* array_capsule = PyCapsule_New( + array_ptr, + arrow_array_str.data(), + release_arrow_array_pycapsule + ); + + if (!array_capsule) + { + // Check for Python error + if (PyErr_Occurred()) + { + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + PyObject* str_value = PyObject_Str(value); + const char* error_msg = str_value ? PyUnicode_AsUTF8(str_value) : "Unknown error"; + + std::string error_str = std::string("PyCapsule_New failed for array: ") + error_msg; + + Py_XDECREF(str_value); + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + + delete array_ptr; + Py_DECREF(schema_capsule); + throw std::runtime_error(error_str); + } + delete array_ptr; Py_DECREF(schema_capsule); return {nullptr, nullptr}; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0e7353f..9cfb481 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -84,3 +84,114 @@ add_custom_target(run_tests_with_junit_report ) set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utilities") + +# Polars integration test helper library +# ======================================= +add_library(test_polars_helper SHARED test_polars_helper.cpp) + +target_link_libraries(test_polars_helper + PUBLIC + sparrow-pycapsule + sparrow::sparrow + Python::Python +) + +target_compile_features(test_polars_helper PRIVATE cxx_std_20) + +if(MSVC) + target_compile_options(test_polars_helper PRIVATE /W4) +else() + target_compile_options(test_polars_helper PRIVATE -Wall -Wextra -Wpedantic) +endif() + +set_target_properties(test_polars_helper PROPERTIES + FOLDER tests + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} +) + +# Python integration test +# ======================= +find_package(Python COMPONENTS Interpreter QUIET) + +if(Python_Interpreter_FOUND) + # Add a test that runs the Python integration script + add_test( + NAME test_polars_integration + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_polars_integration.py + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + + # Set environment variables so Python can find the libraries + # Use generator expressions to get the actual library paths from targets + set_tests_properties(test_polars_integration PROPERTIES + ENVIRONMENT "TEST_POLARS_HELPER_LIB_PATH=$;SPARROW_PYCAPSULE_LIB_PATH=$" + TIMEOUT 300 + DEPENDS test_polars_helper + ) + + message(STATUS "Added Polars integration test (Python ${Python_VERSION})") +else() + message(WARNING "Python interpreter not found, skipping Polars integration test") +endif() + +# Custom target to run Polars tests directly (with better output) +if(Python_Interpreter_FOUND) + add_custom_target(run_polars_tests_direct + COMMAND ${CMAKE_COMMAND} -E echo "==================================" + COMMAND ${CMAKE_COMMAND} -E echo "Polars Integration Test Runner" + COMMAND ${CMAKE_COMMAND} -E echo "==================================" + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Checking Python dependencies..." + COMMAND ${Python_EXECUTABLE} -c "import polars" || ${CMAKE_COMMAND} -E cmake_echo_color --red "ERROR: polars not installed. Install with: pip install polars" + COMMAND ${Python_EXECUTABLE} -c "import pyarrow" || ${CMAKE_COMMAND} -E cmake_echo_color --red "ERROR: pyarrow not installed. Install with: pip install pyarrow" + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Library paths:" + COMMAND ${CMAKE_COMMAND} -E echo " TEST_POLARS_HELPER_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." + COMMAND ${CMAKE_COMMAND} -E echo "" + COMMAND ${CMAKE_COMMAND} -E env + "TEST_POLARS_HELPER_LIB_PATH=$" + "SPARROW_PYCAPSULE_LIB_PATH=$" + ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_polars_integration.py + COMMAND ${CMAKE_COMMAND} -E echo "" + DEPENDS test_polars_helper sparrow-pycapsule + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Running Polars integration tests directly" + USES_TERMINAL + ) + + set_target_properties(run_polars_tests_direct PROPERTIES FOLDER "Tests utilities") + + # Custom target to check Polars dependencies + add_custom_target(check_polars_deps + COMMAND ${CMAKE_COMMAND} -E echo "Checking Polars integration test dependencies..." + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/check_deps.py + COMMAND ${CMAKE_COMMAND} -E echo "Environment variables that will be set:" + COMMAND ${CMAKE_COMMAND} -E echo " TEST_POLARS_HELPER_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Checking Polars test dependencies" + USES_TERMINAL + ) + + set_target_properties(check_polars_deps PROPERTIES FOLDER "Tests utilities") + + # Minimal library loading test for debugging segfaults + add_custom_target(test_library_load + COMMAND ${CMAKE_COMMAND} -E env + "TEST_POLARS_HELPER_LIB_PATH=$" + "SPARROW_PYCAPSULE_LIB_PATH=$" + "PYTHONUNBUFFERED=1" + ${Python_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/test_library_load.py + DEPENDS + test_polars_helper + sparrow-pycapsule + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Testing library loading step-by-step" + USES_TERMINAL + ) + + set_target_properties(test_library_load PROPERTIES FOLDER "Tests utilities") +endif() diff --git a/test/check_deps.py b/test/check_deps.py new file mode 100644 index 0000000..e082ecc --- /dev/null +++ b/test/check_deps.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +"""Check if required Python dependencies are installed.""" + +import sys + +try: + import polars + print(f"polars version: {polars.__version__}") +except ImportError: + print("ERROR: polars not installed") + sys.exit(1) + +try: + import pyarrow as pa + print(f"pyarrow version: {pa.__version__}") +except ImportError: + print("ERROR: pyarrow not installed") + sys.exit(1) + +print("\nAll dependencies installed!") +sys.exit(0) diff --git a/test/test_library_load.py b/test/test_library_load.py new file mode 100644 index 0000000..175bf2d --- /dev/null +++ b/test/test_library_load.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +Minimal test to debug library loading issues. +This script tests each step individually to identify where segfaults occur. +""" + +import sys +import os +import ctypes +from pathlib import Path + +def step(num, description): + """Print a test step.""" + print(f"\n{'='*60}") + print(f"Step {num}: {description}") + print('='*60) + +def main(): + print("\n" + "="*60) + print("Library Loading Debug Test") + print("="*60) + + try: + # Step 1: Check environment variables + step(1, "Checking environment variables") + helper_path = os.environ.get('TEST_POLARS_HELPER_LIB_PATH') + main_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') + + print(f"TEST_POLARS_HELPER_LIB_PATH: {helper_path}") + print(f"SPARROW_PYCAPSULE_LIB_PATH: {main_path}") + + if not helper_path: + print("ERROR: TEST_POLARS_HELPER_LIB_PATH not set") + return 1 + + if not main_path: + print("ERROR: SPARROW_PYCAPSULE_LIB_PATH not set") + return 1 + + # Step 2: Check files exist + step(2, "Checking library files exist") + helper_file = Path(helper_path) + main_file = Path(main_path) + + print(f"Helper library exists: {helper_file.exists()} ({helper_file})") + print(f"Main library exists: {main_file.exists()} ({main_file})") + + if not helper_file.exists(): + print(f"ERROR: Helper library not found at {helper_file}") + return 1 + + if not main_file.exists(): + print(f"ERROR: Main library not found at {main_file}") + return 1 + + # Step 3: Test ctypes import + step(3, "Testing ctypes module") + print("ctypes imported successfully") + print(f"ctypes.CDLL: {ctypes.CDLL}") + + # Step 4: Try loading main library + step(4, "Loading sparrow-pycapsule library") + try: + main_lib = ctypes.CDLL(str(main_file)) + print("✓ Main library loaded successfully") + except Exception as e: + print(f"✗ Failed to load main library: {e}") + return 1 + + # Step 5: Try loading helper library + step(5, "Loading test_polars_helper library") + try: + helper_lib = ctypes.CDLL(str(helper_file)) + print("✓ Helper library loaded successfully") + except Exception as e: + print(f"✗ Failed to load helper library: {e}") + return 1 + + # Step 6: Check if init_python exists + step(6, "Checking for init_python function") + try: + if hasattr(helper_lib, 'init_python'): + print("✓ init_python function found") + else: + print("✗ init_python function not found") + print(f"Available attributes: {dir(helper_lib)}") + return 1 + except Exception as e: + print(f"✗ Error checking for init_python: {e}") + return 1 + + # Step 7: Call init_python + step(7, "Calling init_python()") + print("About to call init_python()...") + sys.stdout.flush() + + try: + helper_lib.init_python() + print("✓ init_python() called successfully") + except Exception as e: + print(f"✗ init_python() failed: {e}") + return 1 + + # Step 8: Check Python state + step(8, "Checking Python interpreter state") + import sys as sys2 + print(f"Python version: {sys2.version}") + print(f"Python initialized: {sys2.version_info}") + + print("\n" + "="*60) + print("✓ ALL STEPS COMPLETED SUCCESSFULLY") + print("="*60) + return 0 + + except Exception as e: + print(f"\n✗ EXCEPTION: {e}") + import traceback + traceback.print_exc() + return 1 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/test/test_polars_helper.cpp b/test/test_polars_helper.cpp new file mode 100644 index 0000000..7a08009 --- /dev/null +++ b/test/test_polars_helper.cpp @@ -0,0 +1,186 @@ +/** + * @file test_polars_helper.cpp + * @brief C++ helper library for Polars integration tests. + * + * This library provides C functions that can be called from Python via ctypes + * to test the bidirectional data exchange between Polars and sparrow. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +// Export C API functions for ctypes +extern "C" +{ + /** + * @brief Initialize Python interpreter if not already initialized. + * + * Note: When called from Python (via ctypes), Python is already initialized. + * This function only initializes if called from pure C++ context. + */ + void init_python() + { + // When called from Python via ctypes, Python is already initialized + // So this check should always be true, and we do nothing + if (Py_IsInitialized()) + { + // Python already initialized - this is the normal case when called from Python + return; + } + + // Only initialize if we're being called from C++ without Python + Py_Initialize(); + } + + /** + * @brief Create a simple test array and return raw Arrow C pointers. + * + * Instead of creating PyCapsules in C++, we return raw pointers that Python + * will wrap in PyCapsules. This avoids Python C API calls from ctypes libraries. + * + * @param schema_ptr_out Output parameter for ArrowSchema pointer + * @param array_ptr_out Output parameter for ArrowArray pointer + * @return 0 on success, -1 on error + */ + int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) + { + try + { + // Create a test array with nullable integers + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + sparrow::array arr(std::move(prim_array)); + + // Extract Arrow C structures + auto [arrow_array, arrow_schema] = sparrow::extract_arrow_structures(std::move(arr)); + + // Allocate on heap and transfer ownership to Python + ArrowSchema* schema_ptr = new ArrowSchema(std::move(arrow_schema)); + ArrowArray* array_ptr = new ArrowArray(std::move(arrow_array)); + + *schema_ptr_out = schema_ptr; + *array_ptr_out = array_ptr; + + return 0; + } + catch (const std::exception& e) + { + std::cerr << "Exception in create_test_array_as_pointers: " << e.what() << std::endl; + return -1; + } + } + + /** + * @brief Import array from raw Arrow C pointers and return new pointers. + * + * @param schema_ptr_in Input ArrowSchema pointer + * @param array_ptr_in Input ArrowArray pointer + * @param schema_ptr_out Output ArrowSchema pointer + * @param array_ptr_out Output ArrowArray pointer + * @return 0 on success, -1 on error + */ + int + roundtrip_array_pointers(void* schema_ptr_in, void* array_ptr_in, void** schema_ptr_out, void** array_ptr_out) + { + try + { + if (schema_ptr_in == nullptr || array_ptr_in == nullptr) + { + std::cerr << "Null input pointers" << std::endl; + return -1; + } + + ArrowSchema* schema_in = static_cast(schema_ptr_in); + ArrowArray* array_in = static_cast(array_ptr_in); + + // Move the data (mark originals as released to prevent double-free) + ArrowSchema schema_moved = *schema_in; + ArrowArray array_moved = *array_in; + schema_in->release = nullptr; + array_in->release = nullptr; + + // Import into sparrow + sparrow::array arr(std::move(array_moved), std::move(schema_moved)); + + std::cout << "Roundtrip array size: " << arr.size() << std::endl; + + // Export back out + auto [arrow_array_out, arrow_schema_out] = sparrow::extract_arrow_structures(std::move(arr)); + + ArrowSchema* schema_out = new ArrowSchema(std::move(arrow_schema_out)); + ArrowArray* array_out = new ArrowArray(std::move(arrow_array_out)); + + *schema_ptr_out = schema_out; + *array_ptr_out = array_out; + + return 0; + } + catch (const std::exception& e) + { + std::cerr << "Exception in roundtrip_array_pointers: " << e.what() << std::endl; + return -1; + } + } + + /** + * @brief Verify that Arrow C structures have the expected size. + * + * @param schema_ptr ArrowSchema pointer + * @param array_ptr ArrowArray pointer + * @param expected_size Expected array size + * @return 0 if size matches, -1 otherwise + */ + int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) + { + try + { + if (schema_ptr == nullptr || array_ptr == nullptr) + { + std::cerr << "Null pointers provided" << std::endl; + return -1; + } + + ArrowSchema* schema = static_cast(schema_ptr); + ArrowArray* array = static_cast(array_ptr); + + // Move the data (mark originals as released) + ArrowSchema schema_moved = *schema; + ArrowArray array_moved = *array; + schema->release = nullptr; + array->release = nullptr; + + sparrow::array arr(std::move(array_moved), std::move(schema_moved)); + + if (arr.size() == expected_size) + { + std::cout << "Array size verified: " << arr.size() << std::endl; + return 0; + } + else + { + std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << std::endl; + return -1; + } + } + catch (const std::exception& e) + { + std::cerr << "Exception in verify_array_size_from_pointers: " << e.what() << std::endl; + return -1; + } + } +} diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py new file mode 100644 index 0000000..fdd8e39 --- /dev/null +++ b/test/test_polars_integration.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +""" +Integration test for sparrow-pycapsule with Polars. + +This test demonstrates bidirectional data exchange between sparrow (C++) and Polars (Python) +using the Arrow C Data Interface. The C++ library returns raw pointers, and Python creates +the PyCapsules to avoid Python C API calls from ctypes-loaded libraries. +""" + +import sys +import ctypes +import os +from pathlib import Path +import polars as pl +import pyarrow as pa + +# Set RTLD_GLOBAL and RTLD_NOW flags before loading any libraries +# This ensures that symbols are shared globally +if hasattr(sys, 'setdlopenflags'): + sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_NOW) + + +def find_library(): + """Find the sparrow-pycapsule shared library.""" + # First check environment variable + env_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') + if env_path: + lib_path = Path(env_path) + if lib_path.exists(): + return str(lib_path) + else: + raise FileNotFoundError( + f"SPARROW_PYCAPSULE_LIB_PATH points to non-existent file: {env_path}" + ) + + # Fallback: try to find the library in the build directory + build_dir = Path(__file__).parent.parent / "build" / "bin" + + # Check different build types and platforms + possible_paths = [ + build_dir / "Debug" / "libsparrow-pycapsule.so", + build_dir / "Release" / "libsparrow-pycapsule.so", + build_dir / "Debug" / "libsparrow-pycapsule.dylib", + build_dir / "Release" / "libsparrow-pycapsule.dylib", + build_dir / "Debug" / "sparrow-pycapsule.dll", + build_dir / "Release" / "sparrow-pycapsule.dll", + ] + + for path in possible_paths: + if path.exists(): + return str(path) + + raise FileNotFoundError( + f"Could not find sparrow-pycapsule library. " + f"Set SPARROW_PYCAPSULE_LIB_PATH environment variable or build the project first. " + f"Searched in: {build_dir}" + ) + + +def load_test_helper_library(): + """Load the C++ test helper library.""" + # First, load sparrow-pycapsule to ensure it's available + main_lib_path = find_library() + ctypes.CDLL(main_lib_path) # Just load it, RTLD_GLOBAL is already set + + # Then load the test helper library + env_path = os.environ.get('TEST_POLARS_HELPER_LIB_PATH') + if env_path: + lib_path = Path(env_path) + if lib_path.exists(): + lib = ctypes.CDLL(str(lib_path)) + # Initialize Python in the C++ library + lib.init_python() + + # Set up function signatures for pointer-based API + lib.create_test_array_as_pointers.argtypes = [ + ctypes.POINTER(ctypes.c_void_p), + ctypes.POINTER(ctypes.c_void_p) + ] + lib.create_test_array_as_pointers.restype = ctypes.c_int + + lib.roundtrip_array_pointers.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.POINTER(ctypes.c_void_p), + ctypes.POINTER(ctypes.c_void_p) + ] + lib.roundtrip_array_pointers.restype = ctypes.c_int + + lib.verify_array_size_from_pointers.argtypes = [ + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_size_t + ] + lib.verify_array_size_from_pointers.restype = ctypes.c_int + + return lib + else: + raise FileNotFoundError( + f"TEST_POLARS_HELPER_LIB_PATH points to non-existent file: {env_path}" + ) + + raise FileNotFoundError( + "Could not find test_polars_helper library. " + "Set TEST_POLARS_HELPER_LIB_PATH environment variable or build the project first." + ) + + +def pointer_to_arrow_capsule(schema_ptr, array_ptr): + """ + Convert C pointers to Arrow-compatible PyCapsules. + + PyArrow is very particular about how capsules are structured. + We use ctypes to call PyArrow's C API directly with our pointers. + """ + # Import the pointers directly using PyArrow's C Data Interface + # by creating a temporary Python object that exposes __arrow_c_array__ + + class ArrowCArrayHolder: + def __init__(self, schema_ptr, array_ptr): + self.schema_ptr = schema_ptr + self.array_ptr = array_ptr + + def __arrow_c_array__(self, requested_schema=None): # noqa: ARG001 + """Return schema and array capsules.""" + # Note: requested_schema is part of the Arrow C Data Interface protocol + from ctypes import pythonapi, py_object, c_void_p, c_char_p + + # PyCapsule_New(void *pointer, const char *name, PyCapsule_Destructor destructor) + pythonapi.PyCapsule_New.restype = py_object + pythonapi.PyCapsule_New.argtypes = [c_void_p, c_char_p, c_void_p] + + schema_capsule = pythonapi.PyCapsule_New( + self.schema_ptr, + b"arrow_schema", + None + ) + + array_capsule = pythonapi.PyCapsule_New( + self.array_ptr, + b"arrow_array", + None + ) + + return (schema_capsule, array_capsule) + + holder = ArrowCArrayHolder(schema_ptr, array_ptr) + return holder.__arrow_c_array__() + + +def capsule_to_pointer(capsule, name): + """Extract the C pointer from a PyCapsule.""" + from ctypes import pythonapi, py_object, c_void_p, c_char_p + + # void* PyCapsule_GetPointer(PyObject *capsule, const char *name) + pythonapi.PyCapsule_GetPointer.restype = c_void_p + pythonapi.PyCapsule_GetPointer.argtypes = [py_object, c_char_p] + + name_bytes = name.encode('utf-8') if name else None + ptr = pythonapi.PyCapsule_GetPointer(capsule, name_bytes) + return ptr + + +def test_create_array_in_cpp(): + """Test creating an array in C++ and importing to Python/Polars.""" + print("\n" + "=" * 70) + print("Test 1: C++ → Python (Create array in C++, import to Polars)") + print("=" * 70) + + try: + # Load the C++ helper library + print("\n1. Loading C++ helper library...") + lib = load_test_helper_library() + + # Create test array in C++ (get raw pointers) + print("\n2. Creating test array in C++ (sparrow)...") + schema_ptr = ctypes.c_void_p() + array_ptr = ctypes.c_void_p() + + result = lib.create_test_array_as_pointers( + ctypes.byref(schema_ptr), + ctypes.byref(array_ptr) + ) + + if result != 0: + print(" ✗ Failed to create array in C++") + return False + + if not schema_ptr.value or not array_ptr.value: + print(" ✗ Received null pointers from C++") + return False + + print(f" ✓ Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") + + print("\n3. Converting C pointers to PyCapsules in Python...") + schema_capsule, array_capsule = pointer_to_arrow_capsule(schema_ptr.value, array_ptr.value) + print(" ✓ PyCapsules created in Python") + + print("\n4. Importing to PyArrow...") + arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) + print(f" ✓ Arrow type: {arrow_array.type}") + print(f" ✓ Arrow values: {arrow_array.to_pylist()}") + + # Convert to Polars + print("\n5. Converting to Polars...") + polars_series = pl.from_arrow(arrow_array) + print(f" ✓ Polars series: {polars_series.to_list()}") + + # Verify expected values + expected = [10, 20, None, 40, 50] + actual = polars_series.to_list() + + if expected == actual: + print(" ✓ Data matches expected values!") + print("\n" + "=" * 70) + print("✓ Test 1 PASSED") + print("=" * 70) + return True + else: + print(f" ✗ Data mismatch!") + print(f" Expected: {expected}") + print(f" Actual: {actual}") + return False + + except Exception as e: + print(f"\n✗ Test 1 FAILED with exception: {e}") + import traceback + traceback.print_exc() + return False + + +def test_polars_to_cpp(): + """Test exporting Polars data to C++.""" + print("\n" + "=" * 70) + print("Test 2: Python → C++ (Export Polars to C++)") + print("=" * 70) + + try: + lib = load_test_helper_library() + + # Create a Polars series + print("\n1. Creating Polars series...") + test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) + print(f" Polars series: {test_series.to_list()}") + + # Export to Arrow and then to capsules + print("\n2. Exporting to Arrow C Data Interface...") + arrow_array = test_series.to_arrow() + schema_capsule, array_capsule = arrow_array.__arrow_c_array__() + print(" ✓ Capsules created") + + # Extract pointers from capsules + print("\n3. Extracting raw pointers from capsules...") + schema_ptr = capsule_to_pointer(schema_capsule, "arrow_schema") + array_ptr = capsule_to_pointer(array_capsule, "arrow_array") + print(f" ✓ Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") + + # Verify in C++ + print("\n4. Verifying in C++ (sparrow)...") + result = lib.verify_array_size_from_pointers(schema_ptr, array_ptr, 5) + + if result == 0: + print(" ✓ C++ successfully imported and verified the array!") + print("\n" + "=" * 70) + print("✓ Test 2 PASSED") + print("=" * 70) + return True + else: + print(" ✗ C++ verification failed") + return False + + except Exception as e: + print(f"\n✗ Test 2 FAILED with exception: {e}") + import traceback + traceback.print_exc() + return False + + +def test_roundtrip(): + """Test round-trip: Python → C++ → Python.""" + print("\n" + "=" * 70) + print("Test 3: Round-trip (Python → C++ → Python)") + print("=" * 70) + + try: + lib = load_test_helper_library() + + # Create a Polars series + print("\n1. Creating Polars series...") + original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) + print(f" Original: {original_series.to_list()}") + + # Export to capsules + print("\n2. Exporting to Arrow C Data Interface...") + arrow_array = original_series.to_arrow() + schema_capsule_in, array_capsule_in = arrow_array.__arrow_c_array__() + + # Extract pointers + schema_ptr_in = capsule_to_pointer(schema_capsule_in, "arrow_schema") + array_ptr_in = capsule_to_pointer(array_capsule_in, "arrow_array") + + # Round-trip through C++ + print("\n3. Round-tripping through C++...") + schema_ptr_out = ctypes.c_void_p() + array_ptr_out = ctypes.c_void_p() + + result = lib.roundtrip_array_pointers( + schema_ptr_in, + array_ptr_in, + ctypes.byref(schema_ptr_out), + ctypes.byref(array_ptr_out) + ) + + if result != 0: + print(" ✗ Round-trip failed in C++") + return False + + if not schema_ptr_out.value or not array_ptr_out.value: + print(" ✗ Received null output pointers from C++") + return False + + print(" ✓ C++ processed the array") + + print("\n4. Converting output to capsules...") + schema_capsule_out, array_capsule_out = pointer_to_arrow_capsule(schema_ptr_out.value, array_ptr_out.value) + + print("\n5. Importing back to Python...") + arrow_array_out = pa.Array._import_from_c_capsule(schema_capsule_out, array_capsule_out) + result_series = pl.from_arrow(arrow_array_out) + print(f" Result: {result_series.to_list()}") + + if original_series.to_list() == result_series.to_list(): + print(" ✓ Round-trip successful - data matches!") + print("\n" + "=" * 70) + print("✓ Test 3 PASSED") + print("=" * 70) + return True + else: + print(" ✗ Data mismatch!") + print(f" Original: {original_series.to_list()}") + print(f" Result: {result_series.to_list()}") + return False + + except Exception as e: + print(f"\n✗ Test 3 FAILED with exception: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + """Run all integration tests.""" + print("\n") + print("╔" + "=" * 68 + "╗") + print("║" + " " * 68 + "║") + print("║" + "Sparrow-PyCapsule ↔ Polars Integration Tests".center(68) + "║") + print("║" + "(Pointer-based approach - no PyCapsule_New in C++)".center(68) + "║") + print("║" + " " * 68 + "║") + print("╚" + "=" * 68 + "╝") + + results = [] + + # Test 1: C++ → Python + results.append(("Test 1: C++ → Python", test_create_array_in_cpp())) + + # Test 2: Python → C++ + results.append(("Test 2: Python → C++", test_polars_to_cpp())) + + # Test 3: Round-trip + results.append(("Test 3: Round-trip", test_roundtrip())) + + # Summary + print("\n") + print("=" * 70) + print("TEST SUMMARY") + print("=" * 70) + + all_passed = True + for name, passed in results: + status = "✓ PASSED" if passed else "✗ FAILED" + print(f"{name}: {status}") + if not passed: + all_passed = False + + print("=" * 70) + + if all_passed: + print("\n🎉 All tests passed!") + return 0 + else: + print("\n❌ Some tests failed") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From dad57012d6ba0b7df5a9a0c95a437b5cfc6e6705 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 13:36:56 +0100 Subject: [PATCH 02/18] wip --- .github/workflows/linux.yml | 8 + .github/workflows/osx.yml | 8 + .github/workflows/windows.yml | 9 + .gitignore | 1 + test/test_polars_integration.py | 336 +++++++++++++------------------- 5 files changed, 159 insertions(+), 203 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 552bdf4..af68ab4 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -50,6 +50,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run Polars integration tests + working-directory: build + run: cmake --build . --target run_polars_tests_direct + - name: Install working-directory: build run: cmake --install . @@ -85,6 +89,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run Polars integration tests + working-directory: build + run: cmake --build . --target run_polars_tests_direct + - name: Install working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 0935925..4a00f9e 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -55,6 +55,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run Polars integration tests + working-directory: build + run: cmake --build . --target run_polars_tests_direct + - name: Install working-directory: build run: cmake --install . @@ -95,6 +99,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report + - name: Run Polars integration tests + working-directory: build + run: cmake --build . --target run_polars_tests_direct + - name: Install working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 575082a..4792d13 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -55,6 +55,10 @@ jobs: run: | cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report + - name: Run Polars integration tests + working-directory: build + run: cmake --build . --config ${{ matrix.build_type }} --target run_polars_tests_direct + - name: Install working-directory: build run: cmake --install . --config ${{ matrix.build_type }} @@ -89,6 +93,11 @@ jobs: # working-directory: build # run: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report + # TODO: Enable when main tests are fixed + #- name: Run Polars integration tests + # working-directory: build + # run: cmake --build . --config ${{ matrix.build_type }} --target run_polars_tests_direct + - name: Install working-directory: build run: cmake --install . --config ${{ matrix.build_type }} diff --git a/.gitignore b/.gitignore index 046c078..dc3bf39 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /build /.vscode +*.pyc diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index fdd8e39..fcbd507 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -11,6 +11,7 @@ import ctypes import os from pathlib import Path +import pytest import polars as pl import pyarrow as pa @@ -161,236 +162,165 @@ def capsule_to_pointer(capsule, name): return ptr -def test_create_array_in_cpp(): +@pytest.fixture(scope="module") +def cpp_lib(): + """Fixture to load the C++ helper library once for all tests.""" + return load_test_helper_library() + + +def test_create_array_in_cpp(cpp_lib): """Test creating an array in C++ and importing to Python/Polars.""" print("\n" + "=" * 70) print("Test 1: C++ → Python (Create array in C++, import to Polars)") print("=" * 70) - try: - # Load the C++ helper library - print("\n1. Loading C++ helper library...") - lib = load_test_helper_library() - - # Create test array in C++ (get raw pointers) - print("\n2. Creating test array in C++ (sparrow)...") - schema_ptr = ctypes.c_void_p() - array_ptr = ctypes.c_void_p() - - result = lib.create_test_array_as_pointers( - ctypes.byref(schema_ptr), - ctypes.byref(array_ptr) - ) - - if result != 0: - print(" ✗ Failed to create array in C++") - return False - - if not schema_ptr.value or not array_ptr.value: - print(" ✗ Received null pointers from C++") - return False - - print(f" ✓ Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") - - print("\n3. Converting C pointers to PyCapsules in Python...") - schema_capsule, array_capsule = pointer_to_arrow_capsule(schema_ptr.value, array_ptr.value) - print(" ✓ PyCapsules created in Python") - - print("\n4. Importing to PyArrow...") - arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) - print(f" ✓ Arrow type: {arrow_array.type}") - print(f" ✓ Arrow values: {arrow_array.to_pylist()}") - - # Convert to Polars - print("\n5. Converting to Polars...") - polars_series = pl.from_arrow(arrow_array) - print(f" ✓ Polars series: {polars_series.to_list()}") - - # Verify expected values - expected = [10, 20, None, 40, 50] - actual = polars_series.to_list() - - if expected == actual: - print(" ✓ Data matches expected values!") - print("\n" + "=" * 70) - print("✓ Test 1 PASSED") - print("=" * 70) - return True - else: - print(f" ✗ Data mismatch!") - print(f" Expected: {expected}") - print(f" Actual: {actual}") - return False - - except Exception as e: - print(f"\n✗ Test 1 FAILED with exception: {e}") - import traceback - traceback.print_exc() - return False + lib = cpp_lib + + # Create test array in C++ (get raw pointers) + print("\n1. Creating test array in C++ (sparrow)...") + schema_ptr = ctypes.c_void_p() + array_ptr = ctypes.c_void_p() + + result = lib.create_test_array_as_pointers( + ctypes.byref(schema_ptr), + ctypes.byref(array_ptr) + ) + + assert result == 0, "Failed to create array in C++" + assert schema_ptr.value is not None, "Received null schema pointer from C++" + assert array_ptr.value is not None, "Received null array pointer from C++" + + print(f" ✓ Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") + + print("\n2. Converting C pointers to PyCapsules in Python...") + schema_capsule, array_capsule = pointer_to_arrow_capsule(schema_ptr.value, array_ptr.value) + print(" ✓ PyCapsules created in Python") + + print("\n3. Importing to PyArrow...") + arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) + print(f" ✓ Arrow type: {arrow_array.type}") + print(f" ✓ Arrow values: {arrow_array.to_pylist()}") + + # Convert to Polars + print("\n4. Converting to Polars...") + polars_series = pl.from_arrow(arrow_array) + print(f" ✓ Polars series: {polars_series.to_list()}") + + # Verify expected values + expected = [10, 20, None, 40, 50] + actual = polars_series.to_list() + + assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" + print(" ✓ Data matches expected values!") + print("\n" + "=" * 70) + print("✓ Test 1 PASSED") + print("=" * 70) -def test_polars_to_cpp(): +def test_polars_to_cpp(cpp_lib): """Test exporting Polars data to C++.""" print("\n" + "=" * 70) print("Test 2: Python → C++ (Export Polars to C++)") print("=" * 70) - try: - lib = load_test_helper_library() - - # Create a Polars series - print("\n1. Creating Polars series...") - test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) - print(f" Polars series: {test_series.to_list()}") - - # Export to Arrow and then to capsules - print("\n2. Exporting to Arrow C Data Interface...") - arrow_array = test_series.to_arrow() - schema_capsule, array_capsule = arrow_array.__arrow_c_array__() - print(" ✓ Capsules created") - - # Extract pointers from capsules - print("\n3. Extracting raw pointers from capsules...") - schema_ptr = capsule_to_pointer(schema_capsule, "arrow_schema") - array_ptr = capsule_to_pointer(array_capsule, "arrow_array") - print(f" ✓ Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") - - # Verify in C++ - print("\n4. Verifying in C++ (sparrow)...") - result = lib.verify_array_size_from_pointers(schema_ptr, array_ptr, 5) - - if result == 0: - print(" ✓ C++ successfully imported and verified the array!") - print("\n" + "=" * 70) - print("✓ Test 2 PASSED") - print("=" * 70) - return True - else: - print(" ✗ C++ verification failed") - return False - - except Exception as e: - print(f"\n✗ Test 2 FAILED with exception: {e}") - import traceback - traceback.print_exc() - return False + lib = cpp_lib + + # Create a Polars series + print("\n1. Creating Polars series...") + test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) + print(f" Polars series: {test_series.to_list()}") + + # Export to Arrow and then to capsules + print("\n2. Exporting to Arrow C Data Interface...") + arrow_array = test_series.to_arrow() + schema_capsule, array_capsule = arrow_array.__arrow_c_array__() + print(" ✓ Capsules created") + + # Extract pointers from capsules + print("\n3. Extracting raw pointers from capsules...") + schema_ptr = capsule_to_pointer(schema_capsule, "arrow_schema") + array_ptr = capsule_to_pointer(array_capsule, "arrow_array") + print(f" ✓ Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") + + # Verify in C++ + print("\n4. Verifying in C++ (sparrow)...") + result = lib.verify_array_size_from_pointers(schema_ptr, array_ptr, 5) + + assert result == 0, "C++ verification failed" + print(" ✓ C++ successfully imported and verified the array!") + print("\n" + "=" * 70) + print("✓ Test 2 PASSED") + print("=" * 70) -def test_roundtrip(): +def test_roundtrip(cpp_lib): """Test round-trip: Python → C++ → Python.""" print("\n" + "=" * 70) print("Test 3: Round-trip (Python → C++ → Python)") print("=" * 70) - try: - lib = load_test_helper_library() - - # Create a Polars series - print("\n1. Creating Polars series...") - original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) - print(f" Original: {original_series.to_list()}") - - # Export to capsules - print("\n2. Exporting to Arrow C Data Interface...") - arrow_array = original_series.to_arrow() - schema_capsule_in, array_capsule_in = arrow_array.__arrow_c_array__() - - # Extract pointers - schema_ptr_in = capsule_to_pointer(schema_capsule_in, "arrow_schema") - array_ptr_in = capsule_to_pointer(array_capsule_in, "arrow_array") - - # Round-trip through C++ - print("\n3. Round-tripping through C++...") - schema_ptr_out = ctypes.c_void_p() - array_ptr_out = ctypes.c_void_p() - - result = lib.roundtrip_array_pointers( - schema_ptr_in, - array_ptr_in, - ctypes.byref(schema_ptr_out), - ctypes.byref(array_ptr_out) - ) - - if result != 0: - print(" ✗ Round-trip failed in C++") - return False - - if not schema_ptr_out.value or not array_ptr_out.value: - print(" ✗ Received null output pointers from C++") - return False - - print(" ✓ C++ processed the array") - - print("\n4. Converting output to capsules...") - schema_capsule_out, array_capsule_out = pointer_to_arrow_capsule(schema_ptr_out.value, array_ptr_out.value) - - print("\n5. Importing back to Python...") - arrow_array_out = pa.Array._import_from_c_capsule(schema_capsule_out, array_capsule_out) - result_series = pl.from_arrow(arrow_array_out) - print(f" Result: {result_series.to_list()}") - - if original_series.to_list() == result_series.to_list(): - print(" ✓ Round-trip successful - data matches!") - print("\n" + "=" * 70) - print("✓ Test 3 PASSED") - print("=" * 70) - return True - else: - print(" ✗ Data mismatch!") - print(f" Original: {original_series.to_list()}") - print(f" Result: {result_series.to_list()}") - return False - - except Exception as e: - print(f"\n✗ Test 3 FAILED with exception: {e}") - import traceback - traceback.print_exc() - return False - - -def main(): - """Run all integration tests.""" - print("\n") - print("╔" + "=" * 68 + "╗") - print("║" + " " * 68 + "║") - print("║" + "Sparrow-PyCapsule ↔ Polars Integration Tests".center(68) + "║") - print("║" + "(Pointer-based approach - no PyCapsule_New in C++)".center(68) + "║") - print("║" + " " * 68 + "║") - print("╚" + "=" * 68 + "╝") + lib = cpp_lib - results = [] + # Create a Polars series + print("\n1. Creating Polars series...") + original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) + print(f" Original: {original_series.to_list()}") - # Test 1: C++ → Python - results.append(("Test 1: C++ → Python", test_create_array_in_cpp())) + # Export to capsules + print("\n2. Exporting to Arrow C Data Interface...") + arrow_array = original_series.to_arrow() + schema_capsule_in, array_capsule_in = arrow_array.__arrow_c_array__() - # Test 2: Python → C++ - results.append(("Test 2: Python → C++", test_polars_to_cpp())) + # Extract pointers + schema_ptr_in = capsule_to_pointer(schema_capsule_in, "arrow_schema") + array_ptr_in = capsule_to_pointer(array_capsule_in, "arrow_array") - # Test 3: Round-trip - results.append(("Test 3: Round-trip", test_roundtrip())) + # Round-trip through C++ + print("\n3. Round-tripping through C++...") + schema_ptr_out = ctypes.c_void_p() + array_ptr_out = ctypes.c_void_p() - # Summary - print("\n") - print("=" * 70) - print("TEST SUMMARY") - print("=" * 70) + result = lib.roundtrip_array_pointers( + schema_ptr_in, + array_ptr_in, + ctypes.byref(schema_ptr_out), + ctypes.byref(array_ptr_out) + ) - all_passed = True - for name, passed in results: - status = "✓ PASSED" if passed else "✗ FAILED" - print(f"{name}: {status}") - if not passed: - all_passed = False + assert result == 0, "Round-trip failed in C++" + assert schema_ptr_out.value is not None, "Received null schema output pointer from C++" + assert array_ptr_out.value is not None, "Received null array output pointer from C++" - print("=" * 70) + print(" ✓ C++ processed the array") + + print("\n4. Converting output to capsules...") + schema_capsule_out, array_capsule_out = pointer_to_arrow_capsule(schema_ptr_out.value, array_ptr_out.value) + + print("\n5. Importing back to Python...") + arrow_array_out = pa.Array._import_from_c_capsule(schema_capsule_out, array_capsule_out) + result_series = pl.from_arrow(arrow_array_out) + print(f" Result: {result_series.to_list()}") + + original_data = original_series.to_list() + result_data = result_series.to_list() + assert original_data == result_data, f"Data mismatch! Original: {original_data}, Result: {result_data}" - if all_passed: - print("\n🎉 All tests passed!") - return 0 - else: - print("\n❌ Some tests failed") - return 1 + print(" ✓ Round-trip successful - data matches!") + print("\n" + "=" * 70) + print("✓ Test 3 PASSED") + print("=" * 70) if __name__ == "__main__": - sys.exit(main()) + """Run tests with pytest when executed directly.""" + print("\n") + print("╔" + "=" * 68 + "╗") + print("║" + " " * 68 + "║") + print("║" + "Sparrow-PyCapsule ↔ Polars Integration Tests".center(68) + "║") + print("║" + "(Pointer-based approach - no PyCapsule_New in C++)".center(68) + "║") + print("║" + " " * 68 + "║") + print("╚" + "=" * 68 + "╝") + print("\nRunning tests with pytest...\n") + + # Run pytest on this file + sys.exit(pytest.main([__file__, "-v", "-s"])) From 104fedda914d099cf6192d76ccd35b05ecf35013 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 13:44:24 +0100 Subject: [PATCH 03/18] fix --- environment-dev.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment-dev.yml b/environment-dev.yml index 8d38ea5..ea07591 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -10,6 +10,9 @@ dependencies: - python # Tests - doctest + - polars + - pyarrow + - pytest # Documentation - doxygen - graphviz From 103a6ecdf96d54aba12e92439ee6ff6cce22727a Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 13:53:23 +0100 Subject: [PATCH 04/18] fix --- .github/workflows/linux.yml | 5 +---- .github/workflows/osx.yml | 5 +---- .github/workflows/windows.yml | 6 +----- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index af68ab4..6db0537 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -51,6 +51,7 @@ jobs: run: cmake --build . --target run_tests_with_junit_report - name: Run Polars integration tests + if: matrix.build_shared == 'ON' working-directory: build run: cmake --build . --target run_polars_tests_direct @@ -89,10 +90,6 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - - name: Run Polars integration tests - working-directory: build - run: cmake --build . --target run_polars_tests_direct - - name: Install working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 4a00f9e..7a3dbb9 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -56,6 +56,7 @@ jobs: run: cmake --build . --target run_tests_with_junit_report - name: Run Polars integration tests + if: matrix.build_shared == 'ON' working-directory: build run: cmake --build . --target run_polars_tests_direct @@ -99,10 +100,6 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - - name: Run Polars integration tests - working-directory: build - run: cmake --build . --target run_polars_tests_direct - - name: Install working-directory: build run: sudo cmake --install . diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 4792d13..fef6f0a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -56,6 +56,7 @@ jobs: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report - name: Run Polars integration tests + if: matrix.build_shared == 'ON' working-directory: build run: cmake --build . --config ${{ matrix.build_type }} --target run_polars_tests_direct @@ -93,11 +94,6 @@ jobs: # working-directory: build # run: cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report - # TODO: Enable when main tests are fixed - #- name: Run Polars integration tests - # working-directory: build - # run: cmake --build . --config ${{ matrix.build_type }} --target run_polars_tests_direct - - name: Install working-directory: build run: cmake --install . --config ${{ matrix.build_type }} From 70965fbfce581a4e38e0cd586187657eafeccfc3 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:13:58 +0100 Subject: [PATCH 05/18] wip --- README.md | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/README.md b/README.md index c857321..0da23c8 100644 --- a/README.md +++ b/README.md @@ -135,22 +135,12 @@ cd build Test bidirectional data exchange with Polars: ```bash -# Run via CMake target (recommended) -cd build -cmake --build . --target run_polars_tests # Or with direct execution (better output) cmake --build . --target run_polars_tests_direct -# Or via CTest -ctest -R test_polars_integration --output-on-failure - # Check dependencies first cmake --build . --target check_polars_deps - -# Or run Python script directly -cd test -python test_polars_integration.py ``` See [test/README_POLARS_TESTS.md](test/README_POLARS_TESTS.md) for detailed documentation. @@ -163,27 +153,18 @@ The project provides several convenient CMake targets for testing: |--------|-------------| | `run_tests` | Run all C++ unit tests | | `run_tests_with_junit_report` | Run C++ tests with JUnit XML output | -| `run_polars_tests` | Run Polars integration test via CTest | | `run_polars_tests_direct` | Run Polars test directly (recommended, better output) | | `check_polars_deps` | Check Python dependencies (polars, pyarrow) | -| `debug_polars_tests` | Run Polars tests with verbose debugging output | -| `test_library_load` | Minimal test to debug library loading issues | **Usage:** ```bash cd build -# Run C++ tests -cmake --build . --target run_tests - # Run Polars integration tests cmake --build . --target run_polars_tests_direct # Check dependencies first cmake --build . --target check_polars_deps - -# Or use CTest to run all tests -ctest --output-on-failure ``` ### Debugging Test Failures @@ -196,9 +177,6 @@ cd build # Run minimal library loading test (step-by-step debugging) cmake --build . --target test_library_load -# Run with full debugging output -cmake --build . --target debug_polars_tests - # Check that libraries exist and dependencies are correct cmake --build . --target check_polars_deps ``` From b522e29c78ba763deb32f0bee1fe576176db367b Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:15:43 +0100 Subject: [PATCH 06/18] fix --- test/test_polars_integration.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index fcbd507..9cb1904 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -312,15 +312,6 @@ def test_roundtrip(cpp_lib): if __name__ == "__main__": - """Run tests with pytest when executed directly.""" - print("\n") - print("╔" + "=" * 68 + "╗") - print("║" + " " * 68 + "║") - print("║" + "Sparrow-PyCapsule ↔ Polars Integration Tests".center(68) + "║") - print("║" + "(Pointer-based approach - no PyCapsule_New in C++)".center(68) + "║") - print("║" + " " * 68 + "║") - print("╚" + "=" * 68 + "╝") - print("\nRunning tests with pytest...\n") - + """Run tests with pytest when executed directly.""" # Run pytest on this file sys.exit(pytest.main([__file__, "-v", "-s"])) From b1f6be31b2e92e12d7d506a3a7cde0dcd5acd320 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:22:40 +0100 Subject: [PATCH 07/18] Fix windows --- test/test_polars_helper.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_polars_helper.cpp b/test/test_polars_helper.cpp index 7a08009..ca656cd 100644 --- a/test/test_polars_helper.cpp +++ b/test/test_polars_helper.cpp @@ -16,6 +16,7 @@ #include #include #include +#include // Export C API functions for ctypes extern "C" @@ -26,7 +27,7 @@ extern "C" * Note: When called from Python (via ctypes), Python is already initialized. * This function only initializes if called from pure C++ context. */ - void init_python() + SPARROW_PYCAPSULE_API void init_python() { // When called from Python via ctypes, Python is already initialized // So this check should always be true, and we do nothing @@ -50,7 +51,7 @@ extern "C" * @param array_ptr_out Output parameter for ArrowArray pointer * @return 0 on success, -1 on error */ - int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) + SPARROW_PYCAPSULE_API int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) { try { @@ -94,7 +95,7 @@ extern "C" * @param array_ptr_out Output ArrowArray pointer * @return 0 on success, -1 on error */ - int + SPARROW_PYCAPSULE_API int roundtrip_array_pointers(void* schema_ptr_in, void* array_ptr_in, void** schema_ptr_out, void** array_ptr_out) { try @@ -145,7 +146,7 @@ extern "C" * @param expected_size Expected array size * @return 0 if size matches, -1 otherwise */ - int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) + SPARROW_PYCAPSULE_API int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) { try { From d0462e043e52d401f1002b25cb893be7be82d03f Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:32:27 +0100 Subject: [PATCH 08/18] fix win compilation --- test/CMakeLists.txt | 2 +- test/test_polars_helper.cpp | 12 ++++--- test/test_polars_helper.hpp | 68 +++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 test/test_polars_helper.hpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9cfb481..1ad88d7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -87,7 +87,7 @@ set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utili # Polars integration test helper library # ======================================= -add_library(test_polars_helper SHARED test_polars_helper.cpp) +add_library(test_polars_helper SHARED test_polars_helper.cpp test_polars_helper.hpp) target_link_libraries(test_polars_helper PUBLIC diff --git a/test/test_polars_helper.cpp b/test/test_polars_helper.cpp index ca656cd..2e426d9 100644 --- a/test/test_polars_helper.cpp +++ b/test/test_polars_helper.cpp @@ -6,17 +6,19 @@ * to test the bidirectional data exchange between Polars and sparrow. */ +#include "test_polars_helper.hpp" + #include #include #include #include +#include #include #include #include #include -#include // Export C API functions for ctypes extern "C" @@ -27,7 +29,7 @@ extern "C" * Note: When called from Python (via ctypes), Python is already initialized. * This function only initializes if called from pure C++ context. */ - SPARROW_PYCAPSULE_API void init_python() + void init_python() { // When called from Python via ctypes, Python is already initialized // So this check should always be true, and we do nothing @@ -51,7 +53,7 @@ extern "C" * @param array_ptr_out Output parameter for ArrowArray pointer * @return 0 on success, -1 on error */ - SPARROW_PYCAPSULE_API int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) + int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) { try { @@ -95,7 +97,7 @@ extern "C" * @param array_ptr_out Output ArrowArray pointer * @return 0 on success, -1 on error */ - SPARROW_PYCAPSULE_API int + int roundtrip_array_pointers(void* schema_ptr_in, void* array_ptr_in, void** schema_ptr_out, void** array_ptr_out) { try @@ -146,7 +148,7 @@ extern "C" * @param expected_size Expected array size * @return 0 if size matches, -1 otherwise */ - SPARROW_PYCAPSULE_API int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) + int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) { try { diff --git a/test/test_polars_helper.hpp b/test/test_polars_helper.hpp new file mode 100644 index 0000000..593d278 --- /dev/null +++ b/test/test_polars_helper.hpp @@ -0,0 +1,68 @@ +/** + * @file test_polars_helper.hpp + * @brief C++ helper library declarations for Polars integration tests. + * + * This header declares C functions that can be called from Python via ctypes + * to test the bidirectional data exchange between Polars and sparrow. + */ + +#ifndef SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP +#define SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP + +#include +#include + +extern "C" +{ + /** + * @brief Initialize Python interpreter if not already initialized. + * + * Note: When called from Python (via ctypes), Python is already initialized. + * This function only initializes if called from pure C++ context. + */ + SPARROW_PYCAPSULE_API void init_python(); + + /** + * @brief Create a simple test array and return raw Arrow C pointers. + * + * Instead of creating PyCapsules in C++, we return raw pointers that Python + * will wrap in PyCapsules. This avoids Python C API calls from ctypes libraries. + * + * @param schema_ptr_out Output parameter for ArrowSchema pointer + * @param array_ptr_out Output parameter for ArrowArray pointer + * @return 0 on success, -1 on error + */ + SPARROW_PYCAPSULE_API int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out); + + /** + * @brief Import array from raw Arrow C pointers and return new pointers. + * + * @param schema_ptr_in Input ArrowSchema pointer + * @param array_ptr_in Input ArrowArray pointer + * @param schema_ptr_out Output ArrowSchema pointer + * @param array_ptr_out Output ArrowArray pointer + * @return 0 on success, -1 on error + */ + SPARROW_PYCAPSULE_API int roundtrip_array_pointers( + void* schema_ptr_in, + void* array_ptr_in, + void** schema_ptr_out, + void** array_ptr_out + ); + + /** + * @brief Verify that Arrow C structures have the expected size. + * + * @param schema_ptr ArrowSchema pointer + * @param array_ptr ArrowArray pointer + * @param expected_size Expected array size + * @return 0 if size matches, -1 otherwise + */ + SPARROW_PYCAPSULE_API int verify_array_size_from_pointers( + void* schema_ptr, + void* array_ptr, + size_t expected_size + ); +} + +#endif // SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP From f8641cc471cce9ed6344303186d8687d87ae5743 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:37:23 +0100 Subject: [PATCH 09/18] fix --- test/test_polars_integration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index 9cb1904..1c28829 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -171,7 +171,7 @@ def cpp_lib(): def test_create_array_in_cpp(cpp_lib): """Test creating an array in C++ and importing to Python/Polars.""" print("\n" + "=" * 70) - print("Test 1: C++ → Python (Create array in C++, import to Polars)") + print("Test 1: C++ -> Python (Create array in C++, import to Polars)") print("=" * 70) lib = cpp_lib @@ -220,7 +220,7 @@ def test_create_array_in_cpp(cpp_lib): def test_polars_to_cpp(cpp_lib): """Test exporting Polars data to C++.""" print("\n" + "=" * 70) - print("Test 2: Python → C++ (Export Polars to C++)") + print("Test 2: Python -> C++ (Export Polars to C++)") print("=" * 70) lib = cpp_lib @@ -254,9 +254,9 @@ def test_polars_to_cpp(cpp_lib): def test_roundtrip(cpp_lib): - """Test round-trip: Python → C++ → Python.""" + """Test round-trip: Python -> C++ -> Python.""" print("\n" + "=" * 70) - print("Test 3: Round-trip (Python → C++ → Python)") + print("Test 3: Round-trip (Python -> C++ -> Python)") print("=" * 70) lib = cpp_lib From 57211c04ff33e2a366d68b7b933779889fa698e5 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 21 Nov 2025 14:44:50 +0100 Subject: [PATCH 10/18] fix --- test/test_polars_integration.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index 1c28829..12a5c00 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -190,30 +190,30 @@ def test_create_array_in_cpp(cpp_lib): assert schema_ptr.value is not None, "Received null schema pointer from C++" assert array_ptr.value is not None, "Received null array pointer from C++" - print(f" ✓ Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") + print(f" Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") print("\n2. Converting C pointers to PyCapsules in Python...") schema_capsule, array_capsule = pointer_to_arrow_capsule(schema_ptr.value, array_ptr.value) - print(" ✓ PyCapsules created in Python") + print(" PyCapsules created in Python") print("\n3. Importing to PyArrow...") arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) - print(f" ✓ Arrow type: {arrow_array.type}") - print(f" ✓ Arrow values: {arrow_array.to_pylist()}") + print(f" Arrow type: {arrow_array.type}") + print(f" Arrow values: {arrow_array.to_pylist()}") # Convert to Polars print("\n4. Converting to Polars...") polars_series = pl.from_arrow(arrow_array) - print(f" ✓ Polars series: {polars_series.to_list()}") + print(f" Polars series: {polars_series.to_list()}") # Verify expected values expected = [10, 20, None, 40, 50] actual = polars_series.to_list() assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" - print(" ✓ Data matches expected values!") + print(" Data matches expected values!") print("\n" + "=" * 70) - print("✓ Test 1 PASSED") + print("Test 1 PASSED") print("=" * 70) @@ -234,22 +234,22 @@ def test_polars_to_cpp(cpp_lib): print("\n2. Exporting to Arrow C Data Interface...") arrow_array = test_series.to_arrow() schema_capsule, array_capsule = arrow_array.__arrow_c_array__() - print(" ✓ Capsules created") + print(" Capsules created") # Extract pointers from capsules print("\n3. Extracting raw pointers from capsules...") schema_ptr = capsule_to_pointer(schema_capsule, "arrow_schema") array_ptr = capsule_to_pointer(array_capsule, "arrow_array") - print(f" ✓ Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") + print(f" Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") # Verify in C++ print("\n4. Verifying in C++ (sparrow)...") result = lib.verify_array_size_from_pointers(schema_ptr, array_ptr, 5) assert result == 0, "C++ verification failed" - print(" ✓ C++ successfully imported and verified the array!") + print(" C++ successfully imported and verified the array!") print("\n" + "=" * 70) - print("✓ Test 2 PASSED") + print("Test 2 PASSED") print("=" * 70) @@ -291,7 +291,7 @@ def test_roundtrip(cpp_lib): assert schema_ptr_out.value is not None, "Received null schema output pointer from C++" assert array_ptr_out.value is not None, "Received null array output pointer from C++" - print(" ✓ C++ processed the array") + print(" C++ processed the array") print("\n4. Converting output to capsules...") schema_capsule_out, array_capsule_out = pointer_to_arrow_capsule(schema_ptr_out.value, array_ptr_out.value) @@ -305,9 +305,9 @@ def test_roundtrip(cpp_lib): result_data = result_series.to_list() assert original_data == result_data, f"Data mismatch! Original: {original_data}, Result: {result_data}" - print(" ✓ Round-trip successful - data matches!") + print(" Round-trip successful - data matches!") print("\n" + "=" * 70) - print("✓ Test 3 PASSED") + print("Test 3 PASSED") print("=" * 70) From 293bf3ea0cd93bf5bcc1fbe00033eaf22420ed83 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Tue, 25 Nov 2025 20:19:11 +0100 Subject: [PATCH 11/18] wip --- test/CMakeLists.txt | 14 +- test/test_library_load.py | 26 +-- test/test_polars_helper.cpp | 152 +++++---------- test/test_polars_helper.hpp | 61 +++--- test/test_polars_helper_module.cpp | 203 ++++++++++++++++++++ test/test_polars_integration.py | 290 +++++++---------------------- 6 files changed, 357 insertions(+), 389 deletions(-) create mode 100644 test/test_polars_helper_module.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1ad88d7..ed875e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -85,15 +85,17 @@ add_custom_target(run_tests_with_junit_report set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utilities") -# Polars integration test helper library -# ======================================= -add_library(test_polars_helper SHARED test_polars_helper.cpp test_polars_helper.hpp) +# Polars integration test helper - Native Python Extension Module +# =============================================================== +# This builds a proper Python extension module (.cpython-*.so) that shares +# the same Python runtime as the interpreter, avoiding dual-runtime issues. + +Python_add_library(test_polars_helper MODULE test_polars_helper_module.cpp) target_link_libraries(test_polars_helper - PUBLIC + PRIVATE sparrow-pycapsule sparrow::sparrow - Python::Python ) target_compile_features(test_polars_helper PRIVATE cxx_std_20) @@ -107,6 +109,8 @@ endif() set_target_properties(test_polars_helper PROPERTIES FOLDER tests LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} + # Python modules must not have a debug suffix - Python won't find them + DEBUG_POSTFIX "" ) # Python integration test diff --git a/test/test_library_load.py b/test/test_library_load.py index 175bf2d..b577365 100644 --- a/test/test_library_load.py +++ b/test/test_library_load.py @@ -21,7 +21,6 @@ def main(): print("="*60) try: - # Step 1: Check environment variables step(1, "Checking environment variables") helper_path = os.environ.get('TEST_POLARS_HELPER_LIB_PATH') main_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') @@ -37,7 +36,6 @@ def main(): print("ERROR: SPARROW_PYCAPSULE_LIB_PATH not set") return 1 - # Step 2: Check files exist step(2, "Checking library files exist") helper_file = Path(helper_path) main_file = Path(main_path) @@ -53,12 +51,10 @@ def main(): print(f"ERROR: Main library not found at {main_file}") return 1 - # Step 3: Test ctypes import step(3, "Testing ctypes module") print("ctypes imported successfully") print(f"ctypes.CDLL: {ctypes.CDLL}") - # Step 4: Try loading main library step(4, "Loading sparrow-pycapsule library") try: main_lib = ctypes.CDLL(str(main_file)) @@ -66,8 +62,7 @@ def main(): except Exception as e: print(f"✗ Failed to load main library: {e}") return 1 - - # Step 5: Try loading helper library + step(5, "Loading test_polars_helper library") try: helper_lib = ctypes.CDLL(str(helper_file)) @@ -76,7 +71,6 @@ def main(): print(f"✗ Failed to load helper library: {e}") return 1 - # Step 6: Check if init_python exists step(6, "Checking for init_python function") try: if hasattr(helper_lib, 'init_python'): @@ -89,24 +83,6 @@ def main(): print(f"✗ Error checking for init_python: {e}") return 1 - # Step 7: Call init_python - step(7, "Calling init_python()") - print("About to call init_python()...") - sys.stdout.flush() - - try: - helper_lib.init_python() - print("✓ init_python() called successfully") - except Exception as e: - print(f"✗ init_python() failed: {e}") - return 1 - - # Step 8: Check Python state - step(8, "Checking Python interpreter state") - import sys as sys2 - print(f"Python version: {sys2.version}") - print(f"Python initialized: {sys2.version_info}") - print("\n" + "="*60) print("✓ ALL STEPS COMPLETED SUCCESSFULLY") print("="*60) diff --git a/test/test_polars_helper.cpp b/test/test_polars_helper.cpp index 2e426d9..a9ad5b5 100644 --- a/test/test_polars_helper.cpp +++ b/test/test_polars_helper.cpp @@ -3,7 +3,8 @@ * @brief C++ helper library for Polars integration tests. * * This library provides C functions that can be called from Python via ctypes - * to test the bidirectional data exchange between Polars and sparrow. + * to test the bidirectional data exchange between Polars and sparrow using + * the sparrow::pycapsule interface. */ #include "test_polars_helper.hpp" @@ -12,52 +13,18 @@ #include #include -#include -#include -#include - #include #include #include -// Export C API functions for ctypes +#include + extern "C" { - /** - * @brief Initialize Python interpreter if not already initialized. - * - * Note: When called from Python (via ctypes), Python is already initialized. - * This function only initializes if called from pure C++ context. - */ - void init_python() - { - // When called from Python via ctypes, Python is already initialized - // So this check should always be true, and we do nothing - if (Py_IsInitialized()) - { - // Python already initialized - this is the normal case when called from Python - return; - } - - // Only initialize if we're being called from C++ without Python - Py_Initialize(); - } - - /** - * @brief Create a simple test array and return raw Arrow C pointers. - * - * Instead of creating PyCapsules in C++, we return raw pointers that Python - * will wrap in PyCapsules. This avoids Python C API calls from ctypes libraries. - * - * @param schema_ptr_out Output parameter for ArrowSchema pointer - * @param array_ptr_out Output parameter for ArrowArray pointer - * @return 0 on success, -1 on error - */ - int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out) + int create_test_array_capsules(PyObject** schema_capsule_out, PyObject** array_capsule_out) { try { - // Create a test array with nullable integers std::vector> values = { sparrow::make_nullable(10, true), sparrow::make_nullable(20, true), @@ -69,120 +36,101 @@ extern "C" sparrow::primitive_array prim_array(std::move(values)); sparrow::array arr(std::move(prim_array)); - // Extract Arrow C structures - auto [arrow_array, arrow_schema] = sparrow::extract_arrow_structures(std::move(arr)); + auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - // Allocate on heap and transfer ownership to Python - ArrowSchema* schema_ptr = new ArrowSchema(std::move(arrow_schema)); - ArrowArray* array_ptr = new ArrowArray(std::move(arrow_array)); + if (schema_capsule == nullptr || array_capsule == nullptr) + { + std::cerr << "Failed to create PyCapsules\n"; + Py_XDECREF(schema_capsule); + Py_XDECREF(array_capsule); + return -1; + } - *schema_ptr_out = schema_ptr; - *array_ptr_out = array_ptr; + *schema_capsule_out = schema_capsule; + *array_capsule_out = array_capsule; return 0; } catch (const std::exception& e) { - std::cerr << "Exception in create_test_array_as_pointers: " << e.what() << std::endl; + std::cerr << "Exception in create_test_array_capsules: " << e.what() << '\n'; return -1; } } - /** - * @brief Import array from raw Arrow C pointers and return new pointers. - * - * @param schema_ptr_in Input ArrowSchema pointer - * @param array_ptr_in Input ArrowArray pointer - * @param schema_ptr_out Output ArrowSchema pointer - * @param array_ptr_out Output ArrowArray pointer - * @return 0 on success, -1 on error - */ - int - roundtrip_array_pointers(void* schema_ptr_in, void* array_ptr_in, void** schema_ptr_out, void** array_ptr_out) + int roundtrip_array_capsules( + PyObject* schema_capsule_in, + PyObject* array_capsule_in, + PyObject** schema_capsule_out, + PyObject** array_capsule_out + ) { try { - if (schema_ptr_in == nullptr || array_ptr_in == nullptr) + if (schema_capsule_in == nullptr || array_capsule_in == nullptr) { - std::cerr << "Null input pointers" << std::endl; + std::cerr << "Null input capsules\n"; return -1; } - ArrowSchema* schema_in = static_cast(schema_ptr_in); - ArrowArray* array_in = static_cast(array_ptr_in); - - // Move the data (mark originals as released to prevent double-free) - ArrowSchema schema_moved = *schema_in; - ArrowArray array_moved = *array_in; - schema_in->release = nullptr; - array_in->release = nullptr; - - // Import into sparrow - sparrow::array arr(std::move(array_moved), std::move(schema_moved)); + sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( + schema_capsule_in, + array_capsule_in + ); - std::cout << "Roundtrip array size: " << arr.size() << std::endl; + std::cout << "Roundtrip array size: " << arr.size() << '\n'; - // Export back out - auto [arrow_array_out, arrow_schema_out] = sparrow::extract_arrow_structures(std::move(arr)); + auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - ArrowSchema* schema_out = new ArrowSchema(std::move(arrow_schema_out)); - ArrowArray* array_out = new ArrowArray(std::move(arrow_array_out)); + if (schema_capsule == nullptr || array_capsule == nullptr) + { + std::cerr << "Failed to create output PyCapsules\n"; + Py_XDECREF(schema_capsule); + Py_XDECREF(array_capsule); + return -1; + } - *schema_ptr_out = schema_out; - *array_ptr_out = array_out; + *schema_capsule_out = schema_capsule; + *array_capsule_out = array_capsule; return 0; } catch (const std::exception& e) { - std::cerr << "Exception in roundtrip_array_pointers: " << e.what() << std::endl; + std::cerr << "Exception in roundtrip_array_capsules: " << e.what() << '\n'; return -1; } } - /** - * @brief Verify that Arrow C structures have the expected size. - * - * @param schema_ptr ArrowSchema pointer - * @param array_ptr ArrowArray pointer - * @param expected_size Expected array size - * @return 0 if size matches, -1 otherwise - */ - int verify_array_size_from_pointers(void* schema_ptr, void* array_ptr, size_t expected_size) + int verify_array_size_from_capsules(PyObject* schema_capsule, PyObject* array_capsule, size_t expected_size) { try { - if (schema_ptr == nullptr || array_ptr == nullptr) + if (schema_capsule == nullptr || array_capsule == nullptr) { - std::cerr << "Null pointers provided" << std::endl; + std::cerr << "Null capsules provided\n"; return -1; } - ArrowSchema* schema = static_cast(schema_ptr); - ArrowArray* array = static_cast(array_ptr); - - // Move the data (mark originals as released) - ArrowSchema schema_moved = *schema; - ArrowArray array_moved = *array; - schema->release = nullptr; - array->release = nullptr; - - sparrow::array arr(std::move(array_moved), std::move(schema_moved)); + sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( + schema_capsule, + array_capsule + ); if (arr.size() == expected_size) { - std::cout << "Array size verified: " << arr.size() << std::endl; + std::cout << "Array size verified: " << arr.size() << '\n'; return 0; } else { - std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << std::endl; + std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << '\n'; return -1; } } catch (const std::exception& e) { - std::cerr << "Exception in verify_array_size_from_pointers: " << e.what() << std::endl; + std::cerr << "Exception in verify_array_size_from_capsules: " << e.what() << '\n'; return -1; } } diff --git a/test/test_polars_helper.hpp b/test/test_polars_helper.hpp index 593d278..51473a9 100644 --- a/test/test_polars_helper.hpp +++ b/test/test_polars_helper.hpp @@ -3,64 +3,63 @@ * @brief C++ helper library declarations for Polars integration tests. * * This header declares C functions that can be called from Python via ctypes - * to test the bidirectional data exchange between Polars and sparrow. + * to test the bidirectional data exchange between Polars and sparrow using + * the sparrow::pycapsule interface. */ #ifndef SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP #define SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP #include + +#include #include extern "C" { /** - * @brief Initialize Python interpreter if not already initialized. + * @brief Create a test array and return PyCapsules. * - * Note: When called from Python (via ctypes), Python is already initialized. - * This function only initializes if called from pure C++ context. - */ - SPARROW_PYCAPSULE_API void init_python(); - - /** - * @brief Create a simple test array and return raw Arrow C pointers. + * Uses sparrow::pycapsule::export_array_to_capsules() to create the capsules. * - * Instead of creating PyCapsules in C++, we return raw pointers that Python - * will wrap in PyCapsules. This avoids Python C API calls from ctypes libraries. - * - * @param schema_ptr_out Output parameter for ArrowSchema pointer - * @param array_ptr_out Output parameter for ArrowArray pointer + * @param schema_capsule_out Output parameter for schema PyCapsule + * @param array_capsule_out Output parameter for array PyCapsule * @return 0 on success, -1 on error */ - SPARROW_PYCAPSULE_API int create_test_array_as_pointers(void** schema_ptr_out, void** array_ptr_out); + SPARROW_PYCAPSULE_API int create_test_array_capsules(PyObject** schema_capsule_out, PyObject** array_capsule_out); /** - * @brief Import array from raw Arrow C pointers and return new pointers. + * @brief Import array from PyCapsules and return new PyCapsules. + * + * Uses sparrow::pycapsule::import_array_from_capsules() and + * sparrow::pycapsule::export_array_to_capsules(). * - * @param schema_ptr_in Input ArrowSchema pointer - * @param array_ptr_in Input ArrowArray pointer - * @param schema_ptr_out Output ArrowSchema pointer - * @param array_ptr_out Output ArrowArray pointer + * @param schema_capsule_in Input schema PyCapsule + * @param array_capsule_in Input array PyCapsule + * @param schema_capsule_out Output schema PyCapsule + * @param array_capsule_out Output array PyCapsule * @return 0 on success, -1 on error */ - SPARROW_PYCAPSULE_API int roundtrip_array_pointers( - void* schema_ptr_in, - void* array_ptr_in, - void** schema_ptr_out, - void** array_ptr_out + SPARROW_PYCAPSULE_API int roundtrip_array_capsules( + PyObject* schema_capsule_in, + PyObject* array_capsule_in, + PyObject** schema_capsule_out, + PyObject** array_capsule_out ); /** - * @brief Verify that Arrow C structures have the expected size. + * @brief Verify that array imported from PyCapsules has the expected size. + * + * Uses sparrow::pycapsule::import_array_from_capsules(). * - * @param schema_ptr ArrowSchema pointer - * @param array_ptr ArrowArray pointer + * @param schema_capsule Schema PyCapsule + * @param array_capsule Array PyCapsule * @param expected_size Expected array size * @return 0 if size matches, -1 otherwise */ - SPARROW_PYCAPSULE_API int verify_array_size_from_pointers( - void* schema_ptr, - void* array_ptr, + SPARROW_PYCAPSULE_API int verify_array_size_from_capsules( + PyObject* schema_capsule, + PyObject* array_capsule, size_t expected_size ); } diff --git a/test/test_polars_helper_module.cpp b/test/test_polars_helper_module.cpp new file mode 100644 index 0000000..5ab0e55 --- /dev/null +++ b/test/test_polars_helper_module.cpp @@ -0,0 +1,203 @@ +/** + * @file test_polars_helper_module.cpp + * @brief Native Python extension module for Polars integration tests. + * + * This is a native Python extension module (not pybind11/nanobind) that tests + * the sparrow::pycapsule interface. Being a proper extension module, it shares + * the same Python runtime as the interpreter, avoiding the dual-runtime issues + * that occur with ctypes. + */ + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include +#include + +#include +#include +#include + +#include + +/** + * Create a test array and return PyCapsules. + * + * Python signature: create_test_array_capsules() -> tuple[capsule, capsule] + */ +static PyObject* py_create_test_array_capsules(PyObject* self, PyObject* args) +{ + (void)self; + (void)args; + + try + { + // Create a test array with nullable integers + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + sparrow::array arr(std::move(prim_array)); + + auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); + + if (schema_capsule == nullptr || array_capsule == nullptr) + { + Py_XDECREF(schema_capsule); + Py_XDECREF(array_capsule); + PyErr_SetString(PyExc_RuntimeError, "Failed to create PyCapsules"); + return nullptr; + } + + PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); + Py_DECREF(schema_capsule); + Py_DECREF(array_capsule); + return result; + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + +/** + * Import array from PyCapsules and return new PyCapsules (roundtrip). + * + * Python signature: roundtrip_array_capsules(schema_capsule, array_capsule) -> tuple[capsule, capsule] + */ +static PyObject* py_roundtrip_array_capsules(PyObject* self, PyObject* args) +{ + (void)self; + + PyObject* schema_capsule_in = nullptr; + PyObject* array_capsule_in = nullptr; + + if (!PyArg_ParseTuple(args, "OO", &schema_capsule_in, &array_capsule_in)) + { + return nullptr; + } + + try + { + // Import from PyCapsules using sparrow::pycapsule + sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( + schema_capsule_in, + array_capsule_in + ); + + std::cout << "Roundtrip array size: " << arr.size() << '\n'; + + // Export back to PyCapsules + auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); + + if (schema_capsule == nullptr || array_capsule == nullptr) + { + Py_XDECREF(schema_capsule); + Py_XDECREF(array_capsule); + PyErr_SetString(PyExc_RuntimeError, "Failed to create output PyCapsules"); + return nullptr; + } + + // Return as a tuple + PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); + Py_DECREF(schema_capsule); + Py_DECREF(array_capsule); + return result; + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + +/** + * Verify that array imported from PyCapsules has the expected size. + * + * Python signature: verify_array_size_from_capsules(schema_capsule, array_capsule, expected_size) -> bool + */ +static PyObject* py_verify_array_size_from_capsules(PyObject* self, PyObject* args) +{ + (void)self; + + PyObject* schema_capsule = nullptr; + PyObject* array_capsule = nullptr; + Py_ssize_t expected_size = 0; + + if (!PyArg_ParseTuple(args, "OOn", &schema_capsule, &array_capsule, &expected_size)) + { + return nullptr; + } + + try + { + // Import from PyCapsules using sparrow::pycapsule + sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( + schema_capsule, + array_capsule + ); + + std::cout << "Array size verified: " << arr.size() << '\n'; + + if (static_cast(arr.size()) == expected_size) + { + Py_RETURN_TRUE; + } + else + { + std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << '\n'; + Py_RETURN_FALSE; + } + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + +// Method definitions +static PyMethodDef TestPolarsHelperMethods[] = { + { + "create_test_array_capsules", + py_create_test_array_capsules, + METH_NOARGS, + "Create a test array and return (schema_capsule, array_capsule) tuple." + }, + { + "roundtrip_array_capsules", + py_roundtrip_array_capsules, + METH_VARARGS, + "Import array from capsules and export back to new capsules." + }, + { + "verify_array_size_from_capsules", + py_verify_array_size_from_capsules, + METH_VARARGS, + "Verify that array from capsules has the expected size." + }, + {nullptr, nullptr, 0, nullptr} // Sentinel +}; + +// Module definition +static struct PyModuleDef test_polars_helper_module = { + PyModuleDef_HEAD_INIT, + "test_polars_helper", // Module name + "Test helper module for sparrow-pycapsule Polars integration tests.\n" + "This module tests the sparrow::pycapsule interface for Arrow data exchange.", + -1, // Module state size (-1 = no state) + TestPolarsHelperMethods +}; + +// Module initialization function +PyMODINIT_FUNC PyInit_test_polars_helper(void) +{ + return PyModule_Create(&test_polars_helper_module); +} diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index 12a5c00..27741e2 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -3,210 +3,78 @@ Integration test for sparrow-pycapsule with Polars. This test demonstrates bidirectional data exchange between sparrow (C++) and Polars (Python) -using the Arrow C Data Interface. The C++ library returns raw pointers, and Python creates -the PyCapsules to avoid Python C API calls from ctypes-loaded libraries. +using the Arrow C Data Interface via sparrow::pycapsule. The test_polars_helper module is +a native Python extension that uses sparrow::pycapsule::export_array_to_capsules() and +import_array_from_capsules() to create and consume Arrow PyCapsules directly. """ import sys -import ctypes import os from pathlib import Path + import pytest import polars as pl import pyarrow as pa -# Set RTLD_GLOBAL and RTLD_NOW flags before loading any libraries -# This ensures that symbols are shared globally -if hasattr(sys, 'setdlopenflags'): - sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_NOW) - -def find_library(): - """Find the sparrow-pycapsule shared library.""" - # First check environment variable - env_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') - if env_path: - lib_path = Path(env_path) - if lib_path.exists(): - return str(lib_path) - else: - raise FileNotFoundError( - f"SPARROW_PYCAPSULE_LIB_PATH points to non-existent file: {env_path}" - ) - - # Fallback: try to find the library in the build directory - build_dir = Path(__file__).parent.parent / "build" / "bin" - - # Check different build types and platforms - possible_paths = [ - build_dir / "Debug" / "libsparrow-pycapsule.so", - build_dir / "Release" / "libsparrow-pycapsule.so", - build_dir / "Debug" / "libsparrow-pycapsule.dylib", - build_dir / "Release" / "libsparrow-pycapsule.dylib", - build_dir / "Debug" / "sparrow-pycapsule.dll", - build_dir / "Release" / "sparrow-pycapsule.dll", +def setup_module_path(): + """Add the build directory to Python path so we can import test_polars_helper.""" + # Check for environment variable first + helper_path = os.environ.get('TEST_POLARS_HELPER_PATH') + if helper_path: + module_dir = Path(helper_path).parent + if module_dir.exists(): + sys.path.insert(0, str(module_dir)) + return + + # Try to find in build directory + test_dir = Path(__file__).parent + build_dirs = [ + test_dir.parent / "build" / "bin" / "Debug", + test_dir.parent / "build" / "bin" / "Release", + test_dir.parent / "build" / "bin", ] - for path in possible_paths: - if path.exists(): - return str(path) + for build_dir in build_dirs: + if build_dir.exists(): + sys.path.insert(0, str(build_dir)) + return - raise FileNotFoundError( - f"Could not find sparrow-pycapsule library. " - f"Set SPARROW_PYCAPSULE_LIB_PATH environment variable or build the project first. " - f"Searched in: {build_dir}" + raise ImportError( + "Could not find test_polars_helper module. " + "Build the project first or set TEST_POLARS_HELPER_PATH." ) -def load_test_helper_library(): - """Load the C++ test helper library.""" - # First, load sparrow-pycapsule to ensure it's available - main_lib_path = find_library() - ctypes.CDLL(main_lib_path) # Just load it, RTLD_GLOBAL is already set - - # Then load the test helper library - env_path = os.environ.get('TEST_POLARS_HELPER_LIB_PATH') - if env_path: - lib_path = Path(env_path) - if lib_path.exists(): - lib = ctypes.CDLL(str(lib_path)) - # Initialize Python in the C++ library - lib.init_python() - - # Set up function signatures for pointer-based API - lib.create_test_array_as_pointers.argtypes = [ - ctypes.POINTER(ctypes.c_void_p), - ctypes.POINTER(ctypes.c_void_p) - ] - lib.create_test_array_as_pointers.restype = ctypes.c_int - - lib.roundtrip_array_pointers.argtypes = [ - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.POINTER(ctypes.c_void_p), - ctypes.POINTER(ctypes.c_void_p) - ] - lib.roundtrip_array_pointers.restype = ctypes.c_int - - lib.verify_array_size_from_pointers.argtypes = [ - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.c_size_t - ] - lib.verify_array_size_from_pointers.restype = ctypes.c_int - - return lib - else: - raise FileNotFoundError( - f"TEST_POLARS_HELPER_LIB_PATH points to non-existent file: {env_path}" - ) - - raise FileNotFoundError( - "Could not find test_polars_helper library. " - "Set TEST_POLARS_HELPER_LIB_PATH environment variable or build the project first." - ) - +# Set up module path before importing +setup_module_path() -def pointer_to_arrow_capsule(schema_ptr, array_ptr): - """ - Convert C pointers to Arrow-compatible PyCapsules. - - PyArrow is very particular about how capsules are structured. - We use ctypes to call PyArrow's C API directly with our pointers. - """ - # Import the pointers directly using PyArrow's C Data Interface - # by creating a temporary Python object that exposes __arrow_c_array__ - - class ArrowCArrayHolder: - def __init__(self, schema_ptr, array_ptr): - self.schema_ptr = schema_ptr - self.array_ptr = array_ptr - - def __arrow_c_array__(self, requested_schema=None): # noqa: ARG001 - """Return schema and array capsules.""" - # Note: requested_schema is part of the Arrow C Data Interface protocol - from ctypes import pythonapi, py_object, c_void_p, c_char_p - - # PyCapsule_New(void *pointer, const char *name, PyCapsule_Destructor destructor) - pythonapi.PyCapsule_New.restype = py_object - pythonapi.PyCapsule_New.argtypes = [c_void_p, c_char_p, c_void_p] - - schema_capsule = pythonapi.PyCapsule_New( - self.schema_ptr, - b"arrow_schema", - None - ) - - array_capsule = pythonapi.PyCapsule_New( - self.array_ptr, - b"arrow_array", - None - ) - - return (schema_capsule, array_capsule) - - holder = ArrowCArrayHolder(schema_ptr, array_ptr) - return holder.__arrow_c_array__() - - -def capsule_to_pointer(capsule, name): - """Extract the C pointer from a PyCapsule.""" - from ctypes import pythonapi, py_object, c_void_p, c_char_p - - # void* PyCapsule_GetPointer(PyObject *capsule, const char *name) - pythonapi.PyCapsule_GetPointer.restype = c_void_p - pythonapi.PyCapsule_GetPointer.argtypes = [py_object, c_char_p] - - name_bytes = name.encode('utf-8') if name else None - ptr = pythonapi.PyCapsule_GetPointer(capsule, name_bytes) - return ptr +# Import the native Python extension module +import test_polars_helper # noqa: E402 -@pytest.fixture(scope="module") -def cpp_lib(): - """Fixture to load the C++ helper library once for all tests.""" - return load_test_helper_library() - - -def test_create_array_in_cpp(cpp_lib): - """Test creating an array in C++ and importing to Python/Polars.""" +def test_create_array_in_cpp(): + """Test creating an array in C++ (sparrow) and importing to Python/Polars.""" print("\n" + "=" * 70) - print("Test 1: C++ -> Python (Create array in C++, import to Polars)") + print("Test 1: C++ -> Python (Create array in sparrow, import to Polars)") print("=" * 70) - lib = cpp_lib - - # Create test array in C++ (get raw pointers) - print("\n1. Creating test array in C++ (sparrow)...") - schema_ptr = ctypes.c_void_p() - array_ptr = ctypes.c_void_p() - - result = lib.create_test_array_as_pointers( - ctypes.byref(schema_ptr), - ctypes.byref(array_ptr) - ) - - assert result == 0, "Failed to create array in C++" - assert schema_ptr.value is not None, "Received null schema pointer from C++" - assert array_ptr.value is not None, "Received null array pointer from C++" - - print(f" Array created (schema_ptr={hex(schema_ptr.value)}, array_ptr={hex(array_ptr.value)})") + print("\n1. Creating test array in C++ using sparrow::pycapsule...") + schema_capsule, array_capsule = test_polars_helper.create_test_array_capsules() - print("\n2. Converting C pointers to PyCapsules in Python...") - schema_capsule, array_capsule = pointer_to_arrow_capsule(schema_ptr.value, array_ptr.value) - print(" PyCapsules created in Python") + assert schema_capsule is not None, "Received null schema capsule from C++" + assert array_capsule is not None, "Received null array capsule from C++" + print(" PyCapsules created by sparrow::pycapsule::export_array_to_capsules()") - print("\n3. Importing to PyArrow...") + print("\n2. Importing to PyArrow...") arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) print(f" Arrow type: {arrow_array.type}") print(f" Arrow values: {arrow_array.to_pylist()}") - # Convert to Polars - print("\n4. Converting to Polars...") + print("\n3. Converting to Polars...") polars_series = pl.from_arrow(arrow_array) print(f" Polars series: {polars_series.to_list()}") - # Verify expected values expected = [10, 20, None, 40, 50] actual = polars_series.to_list() @@ -217,86 +85,58 @@ def test_create_array_in_cpp(cpp_lib): print("=" * 70) -def test_polars_to_cpp(cpp_lib): - """Test exporting Polars data to C++.""" +def test_polars_to_cpp(): + """Test exporting Polars data to C++ (sparrow).""" print("\n" + "=" * 70) - print("Test 2: Python -> C++ (Export Polars to C++)") + print("Test 2: Python -> C++ (Export Polars to sparrow)") print("=" * 70) - lib = cpp_lib - - # Create a Polars series print("\n1. Creating Polars series...") test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) print(f" Polars series: {test_series.to_list()}") - # Export to Arrow and then to capsules - print("\n2. Exporting to Arrow C Data Interface...") + print("\n2. Exporting to Arrow PyCapsules...") arrow_array = test_series.to_arrow() schema_capsule, array_capsule = arrow_array.__arrow_c_array__() - print(" Capsules created") - - # Extract pointers from capsules - print("\n3. Extracting raw pointers from capsules...") - schema_ptr = capsule_to_pointer(schema_capsule, "arrow_schema") - array_ptr = capsule_to_pointer(array_capsule, "arrow_array") - print(f" Pointers extracted (schema={hex(schema_ptr)}, array={hex(array_ptr)})") + print(" PyCapsules created by PyArrow") - # Verify in C++ - print("\n4. Verifying in C++ (sparrow)...") - result = lib.verify_array_size_from_pointers(schema_ptr, array_ptr, 5) + print("\n3. Importing and verifying in sparrow using sparrow::pycapsule...") + result = test_polars_helper.verify_array_size_from_capsules(schema_capsule, array_capsule, 5) - assert result == 0, "C++ verification failed" - print(" C++ successfully imported and verified the array!") + assert result is True, "C++ verification failed" + print(" sparrow::pycapsule::import_array_from_capsules() succeeded!") + print(" sparrow successfully imported and verified the array!") print("\n" + "=" * 70) print("Test 2 PASSED") print("=" * 70) -def test_roundtrip(cpp_lib): - """Test round-trip: Python -> C++ -> Python.""" +def test_roundtrip(): + """Test round-trip: Python -> C++ (sparrow) -> Python.""" print("\n" + "=" * 70) - print("Test 3: Round-trip (Python -> C++ -> Python)") + print("Test 3: Round-trip (Python -> sparrow -> Python)") print("=" * 70) - lib = cpp_lib - - # Create a Polars series print("\n1. Creating Polars series...") original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) print(f" Original: {original_series.to_list()}") - # Export to capsules - print("\n2. Exporting to Arrow C Data Interface...") + print("\n2. Exporting to Arrow PyCapsules...") arrow_array = original_series.to_arrow() schema_capsule_in, array_capsule_in = arrow_array.__arrow_c_array__() + print(" PyCapsules created by PyArrow") - # Extract pointers - schema_ptr_in = capsule_to_pointer(schema_capsule_in, "arrow_schema") - array_ptr_in = capsule_to_pointer(array_capsule_in, "arrow_array") - - # Round-trip through C++ - print("\n3. Round-tripping through C++...") - schema_ptr_out = ctypes.c_void_p() - array_ptr_out = ctypes.c_void_p() - - result = lib.roundtrip_array_pointers( - schema_ptr_in, - array_ptr_in, - ctypes.byref(schema_ptr_out), - ctypes.byref(array_ptr_out) + print("\n3. Round-tripping through sparrow using sparrow::pycapsule...") + schema_capsule_out, array_capsule_out = test_polars_helper.roundtrip_array_capsules( + schema_capsule_in, + array_capsule_in ) - assert result == 0, "Round-trip failed in C++" - assert schema_ptr_out.value is not None, "Received null schema output pointer from C++" - assert array_ptr_out.value is not None, "Received null array output pointer from C++" - - print(" C++ processed the array") - - print("\n4. Converting output to capsules...") - schema_capsule_out, array_capsule_out = pointer_to_arrow_capsule(schema_ptr_out.value, array_ptr_out.value) + assert schema_capsule_out is not None, "Received null schema capsule from C++" + assert array_capsule_out is not None, "Received null array capsule from C++" + print(" sparrow::pycapsule import/export succeeded!") - print("\n5. Importing back to Python...") + print("\n4. Importing back to Python...") arrow_array_out = pa.Array._import_from_c_capsule(schema_capsule_out, array_capsule_out) result_series = pl.from_arrow(arrow_array_out) print(f" Result: {result_series.to_list()}") @@ -312,6 +152,4 @@ def test_roundtrip(cpp_lib): if __name__ == "__main__": - """Run tests with pytest when executed directly.""" - # Run pytest on this file sys.exit(pytest.main([__file__, "-v", "-s"])) From 58d532d897605ba71888325408d35df53e7c86a5 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Tue, 25 Nov 2025 20:28:11 +0100 Subject: [PATCH 12/18] wip --- test/test_polars_integration.py | 224 ++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 97 deletions(-) diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index 27741e2..c8167f4 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -26,7 +26,7 @@ def setup_module_path(): if module_dir.exists(): sys.path.insert(0, str(module_dir)) return - + # Try to find in build directory test_dir = Path(__file__).parent build_dirs = [ @@ -34,12 +34,12 @@ def setup_module_path(): test_dir.parent / "build" / "bin" / "Release", test_dir.parent / "build" / "bin", ] - + for build_dir in build_dirs: if build_dir.exists(): sys.path.insert(0, str(build_dir)) return - + raise ImportError( "Could not find test_polars_helper module. " "Build the project first or set TEST_POLARS_HELPER_PATH." @@ -53,103 +53,133 @@ def setup_module_path(): import test_polars_helper # noqa: E402 -def test_create_array_in_cpp(): +# ============================================================================= +# Test 1: C++ -> Python (Create array in sparrow, import to Polars) +# ============================================================================= + + +class TestCppToPython: """Test creating an array in C++ (sparrow) and importing to Python/Polars.""" - print("\n" + "=" * 70) - print("Test 1: C++ -> Python (Create array in sparrow, import to Polars)") - print("=" * 70) - - print("\n1. Creating test array in C++ using sparrow::pycapsule...") - schema_capsule, array_capsule = test_polars_helper.create_test_array_capsules() - - assert schema_capsule is not None, "Received null schema capsule from C++" - assert array_capsule is not None, "Received null array capsule from C++" - print(" PyCapsules created by sparrow::pycapsule::export_array_to_capsules()") - - print("\n2. Importing to PyArrow...") - arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) - print(f" Arrow type: {arrow_array.type}") - print(f" Arrow values: {arrow_array.to_pylist()}") - - print("\n3. Converting to Polars...") - polars_series = pl.from_arrow(arrow_array) - print(f" Polars series: {polars_series.to_list()}") - - expected = [10, 20, None, 40, 50] - actual = polars_series.to_list() - - assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" - print(" Data matches expected values!") - print("\n" + "=" * 70) - print("Test 1 PASSED") - print("=" * 70) - - -def test_polars_to_cpp(): + + @pytest.fixture(autouse=True) + def setup(self): + """Create test array capsules from C++.""" + self.schema_capsule, self.array_capsule = ( + test_polars_helper.create_test_array_capsules() + ) + + def test_step1_create_capsules_in_cpp(self): + """Step 1: Create PyCapsules in C++ using sparrow::pycapsule.""" + assert self.schema_capsule is not None, "Received null schema capsule from C++" + assert self.array_capsule is not None, "Received null array capsule from C++" + + def test_step2_import_to_pyarrow(self): + """Step 2: Import PyCapsules to PyArrow.""" + arrow_array = pa.Array._import_from_c_capsule( + self.schema_capsule, self.array_capsule + ) + assert arrow_array.type == pa.int32(), f"Expected int32, got {arrow_array.type}" + assert arrow_array.to_pylist() == [10, 20, None, 40, 50] + + def test_step3_convert_to_polars(self): + """Step 3: Convert PyArrow array to Polars series.""" + arrow_array = pa.Array._import_from_c_capsule( + self.schema_capsule, self.array_capsule + ) + polars_series = pl.from_arrow(arrow_array) + + expected = [10, 20, None, 40, 50] + actual = polars_series.to_list() + assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" + + +# ============================================================================= +# Test 2: Python -> C++ (Export Polars to sparrow) +# ============================================================================= + + +class TestPythonToCpp: """Test exporting Polars data to C++ (sparrow).""" - print("\n" + "=" * 70) - print("Test 2: Python -> C++ (Export Polars to sparrow)") - print("=" * 70) - - print("\n1. Creating Polars series...") - test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) - print(f" Polars series: {test_series.to_list()}") - - print("\n2. Exporting to Arrow PyCapsules...") - arrow_array = test_series.to_arrow() - schema_capsule, array_capsule = arrow_array.__arrow_c_array__() - print(" PyCapsules created by PyArrow") - - print("\n3. Importing and verifying in sparrow using sparrow::pycapsule...") - result = test_polars_helper.verify_array_size_from_capsules(schema_capsule, array_capsule, 5) - - assert result is True, "C++ verification failed" - print(" sparrow::pycapsule::import_array_from_capsules() succeeded!") - print(" sparrow successfully imported and verified the array!") - print("\n" + "=" * 70) - print("Test 2 PASSED") - print("=" * 70) - - -def test_roundtrip(): + + @pytest.fixture(autouse=True) + def setup(self): + """Create Polars series and export to capsules.""" + self.test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) + self.arrow_array = self.test_series.to_arrow() + self.schema_capsule, self.array_capsule = self.arrow_array.__arrow_c_array__() + + def test_step1_create_polars_series(self): + """Step 1: Create Polars series.""" + assert self.test_series.to_list() == [100, 200, None, 400, 500] + assert self.test_series.dtype == pl.Int32 + + def test_step2_export_to_capsules(self): + """Step 2: Export Polars series to Arrow PyCapsules.""" + assert self.schema_capsule is not None, "Schema capsule is None" + assert self.array_capsule is not None, "Array capsule is None" + + def test_step3_import_in_sparrow(self): + """Step 3: Import and verify in sparrow using sparrow::pycapsule.""" + result = test_polars_helper.verify_array_size_from_capsules( + self.schema_capsule, self.array_capsule, 5 + ) + assert result is True, "C++ verification failed" + + +# ============================================================================= +# Test 3: Round-trip (Python -> sparrow -> Python) +# ============================================================================= + + +class TestRoundtrip: """Test round-trip: Python -> C++ (sparrow) -> Python.""" - print("\n" + "=" * 70) - print("Test 3: Round-trip (Python -> sparrow -> Python)") - print("=" * 70) - - print("\n1. Creating Polars series...") - original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) - print(f" Original: {original_series.to_list()}") - - print("\n2. Exporting to Arrow PyCapsules...") - arrow_array = original_series.to_arrow() - schema_capsule_in, array_capsule_in = arrow_array.__arrow_c_array__() - print(" PyCapsules created by PyArrow") - - print("\n3. Round-tripping through sparrow using sparrow::pycapsule...") - schema_capsule_out, array_capsule_out = test_polars_helper.roundtrip_array_capsules( - schema_capsule_in, - array_capsule_in - ) - - assert schema_capsule_out is not None, "Received null schema capsule from C++" - assert array_capsule_out is not None, "Received null array capsule from C++" - print(" sparrow::pycapsule import/export succeeded!") - - print("\n4. Importing back to Python...") - arrow_array_out = pa.Array._import_from_c_capsule(schema_capsule_out, array_capsule_out) - result_series = pl.from_arrow(arrow_array_out) - print(f" Result: {result_series.to_list()}") - - original_data = original_series.to_list() - result_data = result_series.to_list() - assert original_data == result_data, f"Data mismatch! Original: {original_data}, Result: {result_data}" - - print(" Round-trip successful - data matches!") - print("\n" + "=" * 70) - print("Test 3 PASSED") - print("=" * 70) + + @pytest.fixture(autouse=True) + def setup(self): + """Create original series and export to capsules.""" + self.original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) + self.arrow_array = self.original_series.to_arrow() + self.schema_capsule_in, self.array_capsule_in = ( + self.arrow_array.__arrow_c_array__() + ) + + def test_step1_create_original_series(self): + """Step 1: Create original Polars series.""" + assert self.original_series.to_list() == [1, 2, None, 4, 5] + + def test_step2_export_to_capsules(self): + """Step 2: Export to Arrow PyCapsules.""" + assert self.schema_capsule_in is not None + assert self.array_capsule_in is not None + + def test_step3_roundtrip_through_sparrow(self): + """Step 3: Round-trip through sparrow using sparrow::pycapsule.""" + schema_capsule_out, array_capsule_out = ( + test_polars_helper.roundtrip_array_capsules( + self.schema_capsule_in, self.array_capsule_in + ) + ) + assert schema_capsule_out is not None, "Received null schema capsule from C++" + assert array_capsule_out is not None, "Received null array capsule from C++" + + def test_step4_import_back_to_python(self): + """Step 4: Import back to Python and verify data matches.""" + schema_capsule_out, array_capsule_out = ( + test_polars_helper.roundtrip_array_capsules( + self.schema_capsule_in, self.array_capsule_in + ) + ) + arrow_array_out = pa.Array._import_from_c_capsule( + schema_capsule_out, array_capsule_out + ) + result_series = pl.from_arrow(arrow_array_out) + + original_data = self.original_series.to_list() + result_data = result_series.to_list() + assert ( + original_data == result_data + ), f"Data mismatch! Original: {original_data}, Result: {result_data}" if __name__ == "__main__": - sys.exit(pytest.main([__file__, "-v", "-s"])) + sys.exit(pytest.main([__file__, "-v"])) From 0a0d3453ac32cc9ed693af3ab4cc1f62b2d7cf15 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Wed, 26 Nov 2025 11:13:54 +0100 Subject: [PATCH 13/18] wip --- .../sparrow-pycapsule/SparrowPythonClass.hpp | 97 +++++++ src/SparrowPythonClass.cpp | 191 ++++++++++++++ test/test_polars_helper_module.cpp | 128 +++++++++- test/test_polars_integration.py | 241 ++++++++++-------- 4 files changed, 549 insertions(+), 108 deletions(-) create mode 100644 include/sparrow-pycapsule/SparrowPythonClass.hpp create mode 100644 src/SparrowPythonClass.cpp diff --git a/include/sparrow-pycapsule/SparrowPythonClass.hpp b/include/sparrow-pycapsule/SparrowPythonClass.hpp new file mode 100644 index 0000000..3418c77 --- /dev/null +++ b/include/sparrow-pycapsule/SparrowPythonClass.hpp @@ -0,0 +1,97 @@ +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include + +#include "sparrow-pycapsule/config/config.hpp" +#include "sparrow-pycapsule/pycapsule.hpp" + +namespace sparrow::pycapsule +{ + /** + * @brief Python object structure for SparrowArray. + * + * This structure holds a pointer to a sparrow::array. The pointer is used + * to avoid issues with C++ objects in C-style Python object structures. + */ + struct SparrowArrayObject + { + PyObject_HEAD sparrow::array* arr; + }; + + /** + * @brief Deallocator for SparrowArray Python objects. + */ + SPARROW_PYCAPSULE_API void SparrowArray_dealloc(SparrowArrayObject* self); + + /** + * @brief Implementation of __arrow_c_array__ method. + * + * This method exports the wrapped sparrow array as Arrow PyCapsules, + * implementing the Arrow PyCapsule Interface (ArrowArrayExportable protocol). + * + * @param self The SparrowArray object. + * @param args Positional arguments (unused). + * @param kwargs Keyword arguments (optional requested_schema). + * @return A tuple of (schema_capsule, array_capsule). + */ + SPARROW_PYCAPSULE_API PyObject* + SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs); + + /** + * @brief Get the size of the wrapped array. + * + * @param self The SparrowArray object. + * @param args Positional arguments (unused). + * @return The size of the array as a Python integer. + */ + SPARROW_PYCAPSULE_API PyObject* SparrowArray_size(SparrowArrayObject* self, PyObject* args); + + /** + * @brief Get the Python type object for SparrowArray. + * + * This function returns a pointer to the SparrowArrayType. The type is + * initialized on first call if necessary. + * + * @return Pointer to the SparrowArrayType, or nullptr on error. + */ + SPARROW_PYCAPSULE_API PyTypeObject* get_sparrow_array_type(); + + /** + * @brief Create a new SparrowArray Python object from a sparrow::array. + * + * This function creates a new Python object that wraps the given sparrow array. + * The array is moved into the Python object, so the caller should not use it + * after this call. + * + * @param arr The sparrow array to wrap (will be moved). + * @return A new reference to a SparrowArray Python object, or nullptr on error. + */ + SPARROW_PYCAPSULE_API PyObject* create_sparrow_array_object(sparrow::array&& arr); + + /** + * @brief Create a new SparrowArray Python object from PyCapsules. + * + * This function creates a new Python object by importing from existing + * Arrow PyCapsules. + * + * @param schema_capsule The schema PyCapsule. + * @param array_capsule The array PyCapsule. + * @return A new reference to a SparrowArray Python object, or nullptr on error. + */ + SPARROW_PYCAPSULE_API PyObject* + create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule); + + /** + * @brief Register the SparrowArray type with a Python module. + * + * This function adds the SparrowArray type to the given module. + * + * @param module The Python module to add the type to. + * @return 0 on success, -1 on error. + */ + SPARROW_PYCAPSULE_API int register_sparrow_array_type(PyObject* module); + +} // namespace sparrow::pycapsule diff --git a/src/SparrowPythonClass.cpp b/src/SparrowPythonClass.cpp new file mode 100644 index 0000000..a796414 --- /dev/null +++ b/src/SparrowPythonClass.cpp @@ -0,0 +1,191 @@ +#include "sparrow-pycapsule/SparrowPythonClass.hpp" + +#include +#include + +namespace sparrow::pycapsule +{ + void SparrowArray_dealloc(SparrowArrayObject* self) + { + delete self->arr; + self->arr = nullptr; + Py_TYPE(self)->tp_free(reinterpret_cast(self)); + } + + PyObject* SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs) + { + static const char* kwlist[] = {"requested_schema", nullptr}; + PyObject* requested_schema = nullptr; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", const_cast(kwlist), &requested_schema)) + { + return nullptr; + } + + // requested_schema is typically ignored for simple cases + // In a full implementation, you might use it to cast to a different type + (void) requested_schema; + + if (self->arr == nullptr) + { + PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data"); + return nullptr; + } + + try + { + auto [schema_capsule, array_capsule] = export_array_to_capsules(*self->arr); + + if (schema_capsule == nullptr || array_capsule == nullptr) + { + Py_XDECREF(schema_capsule); + Py_XDECREF(array_capsule); + PyErr_SetString(PyExc_RuntimeError, "Failed to create Arrow PyCapsules"); + return nullptr; + } + + PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); + Py_DECREF(schema_capsule); + Py_DECREF(array_capsule); + return result; + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } + } + + PyObject* SparrowArray_size(SparrowArrayObject* self, [[maybe_unused]] PyObject* args) + { + if (self->arr == nullptr) + { + PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data"); + return nullptr; + } + + return PyLong_FromSize_t(self->arr->size()); + } + + static PyMethodDef SparrowArray_methods[] = { + {"__arrow_c_array__", + reinterpret_cast(SparrowArray_arrow_c_array), + METH_VARARGS | METH_KEYWORDS, + "Export the array via the Arrow PyCapsule interface.\n\n" + "Parameters\n" + "----------\n" + "requested_schema : object, optional\n" + " Requested schema for the output (typically ignored).\n\n" + "Returns\n" + "-------\n" + "tuple[object, object]\n" + " A tuple of (schema_capsule, array_capsule)."}, + {"size", + reinterpret_cast(SparrowArray_size), + METH_NOARGS, + "Get the number of elements in the array.\n\n" + "Returns\n" + "-------\n" + "int\n" + " The size of the array."}, + {nullptr, nullptr, 0, nullptr} // Sentinel + }; + + // The type object - defined as a static variable + static PyTypeObject SparrowArrayType = { + .ob_base = PyVarObject_HEAD_INIT(nullptr, 0).tp_name = "sparrow.SparrowArray", + .tp_basicsize = sizeof(SparrowArrayObject), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(SparrowArray_dealloc), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = PyDoc_STR( + "SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n" + "This class wraps a sparrow array and implements the Arrow PyCapsule\n" + "Interface (ArrowArrayExportable protocol), allowing it to be passed\n" + "directly to libraries like Polars via pl.from_arrow()." + ), + .tp_methods = SparrowArray_methods, + }; + + static bool type_initialized = false; + + PyTypeObject* get_sparrow_array_type() + { + if (!type_initialized) + { + if (PyType_Ready(&SparrowArrayType) < 0) + { + return nullptr; + } + type_initialized = true; + } + return &SparrowArrayType; + } + + PyObject* create_sparrow_array_object(sparrow::array&& arr) + { + PyTypeObject* type = get_sparrow_array_type(); + if (type == nullptr) + { + return nullptr; + } + + SparrowArrayObject* obj = PyObject_New(SparrowArrayObject, type); + if (obj == nullptr) + { + return nullptr; + } + + try + { + obj->arr = new sparrow::array(std::move(arr)); + } + catch (const std::bad_alloc&) + { + Py_DECREF(obj); + PyErr_NoMemory(); + return nullptr; + } + catch (const std::exception& e) + { + Py_DECREF(obj); + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } + + return reinterpret_cast(obj); + } + + PyObject* create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule) + { + try + { + sparrow::array arr = import_array_from_capsules(schema_capsule, array_capsule); + return create_sparrow_array_object(std::move(arr)); + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } + } + + int register_sparrow_array_type(PyObject* module) + { + PyTypeObject* type = get_sparrow_array_type(); + if (type == nullptr) + { + return -1; + } + + Py_INCREF(type); + if (PyModule_AddObject(module, "SparrowArray", reinterpret_cast(type)) < 0) + { + Py_DECREF(type); + return -1; + } + + return 0; + } + +} // namespace sparrow::pycapsule diff --git a/test/test_polars_helper_module.cpp b/test/test_polars_helper_module.cpp index 5ab0e55..4e8155c 100644 --- a/test/test_polars_helper_module.cpp +++ b/test/test_polars_helper_module.cpp @@ -20,6 +20,41 @@ #include #include +#include + +/** + * Create a test array and return a SparrowArray object. + * + * Python signature: create_test_array() -> SparrowArray + */ +static PyObject* py_create_test_array(PyObject* self, PyObject* args) +{ + (void)self; + (void)args; + + try + { + // Create a test array with nullable integers + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + sparrow::array arr(std::move(prim_array)); + + // Return a SparrowArray object that implements __arrow_c_array__ + return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} /** * Create a test array and return PyCapsules. @@ -67,6 +102,72 @@ static PyObject* py_create_test_array_capsules(PyObject* self, PyObject* args) } } +/** + * Import from an object implementing __arrow_c_array__ and return a SparrowArray. + * + * Python signature: roundtrip_array(arrow_array) -> SparrowArray + */ +static PyObject* py_roundtrip_array(PyObject* self, PyObject* args) +{ + (void)self; + + PyObject* arrow_array = nullptr; + + if (!PyArg_ParseTuple(args, "O", &arrow_array)) + { + return nullptr; + } + + // Get __arrow_c_array__ method from the input object + PyObject* arrow_c_array_method = PyObject_GetAttrString(arrow_array, "__arrow_c_array__"); + if (arrow_c_array_method == nullptr) + { + PyErr_SetString(PyExc_TypeError, "Object does not implement __arrow_c_array__"); + return nullptr; + } + + // Call __arrow_c_array__() to get (schema_capsule, array_capsule) + PyObject* capsules = PyObject_CallObject(arrow_c_array_method, nullptr); + Py_DECREF(arrow_c_array_method); + + if (capsules == nullptr) + { + return nullptr; + } + + if (!PyTuple_Check(capsules) || PyTuple_Size(capsules) != 2) + { + Py_DECREF(capsules); + PyErr_SetString(PyExc_TypeError, "__arrow_c_array__ must return a tuple of 2 capsules"); + return nullptr; + } + + PyObject* schema_capsule = PyTuple_GetItem(capsules, 0); + PyObject* array_capsule = PyTuple_GetItem(capsules, 1); + + try + { + // Import from PyCapsules using sparrow::pycapsule + sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( + schema_capsule, + array_capsule + ); + + Py_DECREF(capsules); + + std::cout << "Roundtrip array size: " << arr.size() << '\n'; + + // Return a SparrowArray object that implements __arrow_c_array__ + return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); + } + catch (const std::exception& e) + { + Py_DECREF(capsules); + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + /** * Import array from PyCapsules and return new PyCapsules (roundtrip). * @@ -165,12 +266,24 @@ static PyObject* py_verify_array_size_from_capsules(PyObject* self, PyObject* ar // Method definitions static PyMethodDef TestPolarsHelperMethods[] = { + { + "create_test_array", + py_create_test_array, + METH_NOARGS, + "Create a test array and return a SparrowArray object implementing __arrow_c_array__." + }, { "create_test_array_capsules", py_create_test_array_capsules, METH_NOARGS, "Create a test array and return (schema_capsule, array_capsule) tuple." }, + { + "roundtrip_array", + py_roundtrip_array, + METH_VARARGS, + "Import from an object implementing __arrow_c_array__ and return a SparrowArray." + }, { "roundtrip_array_capsules", py_roundtrip_array_capsules, @@ -199,5 +312,18 @@ static struct PyModuleDef test_polars_helper_module = { // Module initialization function PyMODINIT_FUNC PyInit_test_polars_helper(void) { - return PyModule_Create(&test_polars_helper_module); + PyObject* module = PyModule_Create(&test_polars_helper_module); + if (module == nullptr) + { + return nullptr; + } + + // Register the SparrowArray type with this module + if (sparrow::pycapsule::register_sparrow_array_type(module) < 0) + { + Py_DECREF(module); + return nullptr; + } + + return module; } diff --git a/test/test_polars_integration.py b/test/test_polars_integration.py index c8167f4..574aeca 100644 --- a/test/test_polars_integration.py +++ b/test/test_polars_integration.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 """ -Integration test for sparrow-pycapsule with Polars. +Integration test for sparrow-pycapsule with Polars and PyArrow. -This test demonstrates bidirectional data exchange between sparrow (C++) and Polars (Python) -using the Arrow C Data Interface via sparrow::pycapsule. The test_polars_helper module is -a native Python extension that uses sparrow::pycapsule::export_array_to_capsules() and -import_array_from_capsules() to create and consume Arrow PyCapsules directly. +This test demonstrates: +1. Sparrow → Polars: Create array in C++ (sparrow), import to Polars +2. PyArrow → Sparrow: Create array in PyArrow, import to sparrow + +The C++ SparrowArray class implements the Arrow PyCapsule Interface (__arrow_c_array__), +allowing direct integration with Polars without going through PyArrow. """ import sys @@ -15,6 +17,8 @@ import pytest import polars as pl import pyarrow as pa +from polars._plr import PySeries +from polars._utils.wrap import wrap_s def setup_module_path(): @@ -54,131 +58,154 @@ def setup_module_path(): # ============================================================================= -# Test 1: C++ -> Python (Create array in sparrow, import to Polars) +# Helper function to convert ArrowArrayExportable to Polars Series # ============================================================================= -class TestCppToPython: - """Test creating an array in C++ (sparrow) and importing to Python/Polars.""" - - @pytest.fixture(autouse=True) - def setup(self): - """Create test array capsules from C++.""" - self.schema_capsule, self.array_capsule = ( - test_polars_helper.create_test_array_capsules() - ) - - def test_step1_create_capsules_in_cpp(self): - """Step 1: Create PyCapsules in C++ using sparrow::pycapsule.""" - assert self.schema_capsule is not None, "Received null schema capsule from C++" - assert self.array_capsule is not None, "Received null array capsule from C++" - - def test_step2_import_to_pyarrow(self): - """Step 2: Import PyCapsules to PyArrow.""" - arrow_array = pa.Array._import_from_c_capsule( - self.schema_capsule, self.array_capsule - ) - assert arrow_array.type == pa.int32(), f"Expected int32, got {arrow_array.type}" - assert arrow_array.to_pylist() == [10, 20, None, 40, 50] - - def test_step3_convert_to_polars(self): - """Step 3: Convert PyArrow array to Polars series.""" - arrow_array = pa.Array._import_from_c_capsule( - self.schema_capsule, self.array_capsule - ) - polars_series = pl.from_arrow(arrow_array) - - expected = [10, 20, None, 40, 50] - actual = polars_series.to_list() - assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" +def arrow_array_to_series(arrow_array, name: str = "") -> pl.Series: + """ + Convert an object implementing __arrow_c_array__ to a Polars Series. + + This function uses Polars' internal PySeries.from_arrow_c_array to create + a Series directly from an Arrow array. + + Parameters + ---------- + arrow_array : ArrowArrayExportable + An object that implements __arrow_c_array__ method. + name : str, optional + Name for the resulting Series. Default is empty string. + + Returns + ------- + pl.Series + A Polars Series containing the array data. + """ + ps = PySeries.from_arrow_c_array(arrow_array) + series = wrap_s(ps) + if name: + series = series.alias(name) + return series # ============================================================================= -# Test 2: Python -> C++ (Export Polars to sparrow) +# Test 1: Sparrow → Polars (Create array in C++, import to Polars) # ============================================================================= -class TestPythonToCpp: - """Test exporting Polars data to C++ (sparrow).""" +class TestSparrowToPolars: + """Test creating an array in C++ (sparrow) and importing to Polars.""" - @pytest.fixture(autouse=True) - def setup(self): - """Create Polars series and export to capsules.""" - self.test_series = pl.Series([100, 200, None, 400, 500], dtype=pl.Int32) - self.arrow_array = self.test_series.to_arrow() - self.schema_capsule, self.array_capsule = self.arrow_array.__arrow_c_array__() + def test_create_sparrow_array(self): + """Create a SparrowArray in C++ that implements __arrow_c_array__.""" + sparrow_array = test_polars_helper.create_test_array() + + assert sparrow_array is not None, "Received null SparrowArray from C++" + assert hasattr(sparrow_array, '__arrow_c_array__'), "SparrowArray missing __arrow_c_array__ method" + assert sparrow_array.size() == 5, f"Expected size 5, got {sparrow_array.size()}" - def test_step1_create_polars_series(self): - """Step 1: Create Polars series.""" - assert self.test_series.to_list() == [100, 200, None, 400, 500] - assert self.test_series.dtype == pl.Int32 + def test_sparrow_to_polars_series(self): + """Convert SparrowArray to Polars Series using the Arrow PyCapsule Interface.""" + sparrow_array = test_polars_helper.create_test_array() + polars_series = arrow_array_to_series(sparrow_array) - def test_step2_export_to_capsules(self): - """Step 2: Export Polars series to Arrow PyCapsules.""" - assert self.schema_capsule is not None, "Schema capsule is None" - assert self.array_capsule is not None, "Array capsule is None" + assert polars_series.dtype == pl.Int32, f"Expected Int32, got {polars_series.dtype}" + expected = [10, 20, None, 40, 50] + actual = polars_series.to_list() + assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" - def test_step3_import_in_sparrow(self): - """Step 3: Import and verify in sparrow using sparrow::pycapsule.""" - result = test_polars_helper.verify_array_size_from_capsules( - self.schema_capsule, self.array_capsule, 5 - ) - assert result is True, "C++ verification failed" + def test_sparrow_to_polars_preserves_nulls(self): + """Verify that null values from sparrow are preserved in Polars.""" + sparrow_array = test_polars_helper.create_test_array() + polars_series = arrow_array_to_series(sparrow_array) + + # The test array has a null at index 2 + values = polars_series.to_list() + assert values[2] is None, "Null value not preserved at index 2" # ============================================================================= -# Test 3: Round-trip (Python -> sparrow -> Python) +# Test 2: PyArrow → Sparrow (Create array in PyArrow, import to sparrow) # ============================================================================= -class TestRoundtrip: - """Test round-trip: Python -> C++ (sparrow) -> Python.""" +class TestPyArrowToSparrow: + """Test creating an array in PyArrow and importing to sparrow.""" - @pytest.fixture(autouse=True) - def setup(self): - """Create original series and export to capsules.""" - self.original_series = pl.Series([1, 2, None, 4, 5], dtype=pl.Int32) - self.arrow_array = self.original_series.to_arrow() - self.schema_capsule_in, self.array_capsule_in = ( - self.arrow_array.__arrow_c_array__() - ) - - def test_step1_create_original_series(self): - """Step 1: Create original Polars series.""" - assert self.original_series.to_list() == [1, 2, None, 4, 5] - - def test_step2_export_to_capsules(self): - """Step 2: Export to Arrow PyCapsules.""" - assert self.schema_capsule_in is not None - assert self.array_capsule_in is not None - - def test_step3_roundtrip_through_sparrow(self): - """Step 3: Round-trip through sparrow using sparrow::pycapsule.""" - schema_capsule_out, array_capsule_out = ( - test_polars_helper.roundtrip_array_capsules( - self.schema_capsule_in, self.array_capsule_in - ) - ) - assert schema_capsule_out is not None, "Received null schema capsule from C++" - assert array_capsule_out is not None, "Received null array capsule from C++" - - def test_step4_import_back_to_python(self): - """Step 4: Import back to Python and verify data matches.""" - schema_capsule_out, array_capsule_out = ( - test_polars_helper.roundtrip_array_capsules( - self.schema_capsule_in, self.array_capsule_in - ) + def test_pyarrow_to_sparrow_via_capsules(self): + """Import PyArrow array to sparrow using PyCapsules.""" + # Create a PyArrow array + pa_array = pa.array([100, 200, None, 400, 500], type=pa.int32()) + + # Export to PyCapsules using Arrow PyCapsule Interface + schema_capsule, array_capsule = pa_array.__arrow_c_array__() + + # Verify sparrow can import and read the data + result = test_polars_helper.verify_array_size_from_capsules( + schema_capsule, array_capsule, 5 ) - arrow_array_out = pa.Array._import_from_c_capsule( - schema_capsule_out, array_capsule_out + assert result is True, "Sparrow failed to import PyArrow array" + + def test_pyarrow_roundtrip_through_sparrow(self): + """Round-trip: PyArrow → sparrow → Polars.""" + # Create a PyArrow array + pa_array = pa.array([1, 2, None, 4, 5], type=pa.int32()) + + # Export to PyCapsules + schema_capsule, array_capsule = pa_array.__arrow_c_array__() + + # Round-trip through sparrow (import then export) + schema_out, array_out = test_polars_helper.roundtrip_array_capsules( + schema_capsule, array_capsule ) - result_series = pl.from_arrow(arrow_array_out) + + # Import the result into Polars using a wrapper + class CapsuleWrapper: + def __init__(self, schema, array): + self._schema = schema + self._array = array + def __arrow_c_array__(self, requested_schema=None): + return self._schema, self._array + + wrapper = CapsuleWrapper(schema_out, array_out) + result_series = arrow_array_to_series(wrapper) + + # Verify data matches + expected = [1, 2, None, 4, 5] + actual = result_series.to_list() + assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" - original_data = self.original_series.to_list() - result_data = result_series.to_list() - assert ( - original_data == result_data - ), f"Data mismatch! Original: {original_data}, Result: {result_data}" + def test_pyarrow_nulls_preserved_in_sparrow(self): + """Verify that null values from PyArrow are preserved through sparrow.""" + # Create a PyArrow array with nulls + pa_array = pa.array([None, 1, None, 3, None], type=pa.int32()) + + # Export to PyCapsules + schema_capsule, array_capsule = pa_array.__arrow_c_array__() + + # Round-trip through sparrow + schema_out, array_out = test_polars_helper.roundtrip_array_capsules( + schema_capsule, array_capsule + ) + + # Import into Polars + class CapsuleWrapper: + def __init__(self, schema, array): + self._schema = schema + self._array = array + def __arrow_c_array__(self, requested_schema=None): + return self._schema, self._array + + wrapper = CapsuleWrapper(schema_out, array_out) + result_series = arrow_array_to_series(wrapper) + + # Check null positions + values = result_series.to_list() + assert values[0] is None, "Null not preserved at index 0" + assert values[1] == 1, "Value changed at index 1" + assert values[2] is None, "Null not preserved at index 2" + assert values[3] == 3, "Value changed at index 3" + assert values[4] is None, "Null not preserved at index 4" if __name__ == "__main__": From d75ff65a388afc7e7fe75574d34c6d64fb08ebfd Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Wed, 26 Nov 2025 13:34:19 +0100 Subject: [PATCH 14/18] wip --- .github/workflows/linux.yml | 4 +- .github/workflows/osx.yml | 4 +- .github/workflows/windows.yml | 4 +- CMakeLists.txt | 2 + ...ass.hpp => sparrow_array_python_class.hpp} | 0 ...ass.cpp => sparrow_array_python_class.cpp} | 86 ++++- test/CMakeLists.txt | 64 ++-- test/sparrow_helpers.py | 72 ++++ test/test_library_load.py | 20 +- test/test_polars_helper.cpp | 137 -------- test/test_polars_helper.hpp | 67 ---- test/test_polars_helper_module.cpp | 329 ------------------ test/test_sparrow_helper_module.cpp | 95 +++++ ...gration.py => test_sparrow_integration.py} | 185 +++++----- 14 files changed, 374 insertions(+), 695 deletions(-) rename include/sparrow-pycapsule/{SparrowPythonClass.hpp => sparrow_array_python_class.hpp} (100%) rename src/{SparrowPythonClass.cpp => sparrow_array_python_class.cpp} (66%) create mode 100644 test/sparrow_helpers.py delete mode 100644 test/test_polars_helper.cpp delete mode 100644 test/test_polars_helper.hpp delete mode 100644 test/test_polars_helper_module.cpp create mode 100644 test/test_sparrow_helper_module.cpp rename test/{test_polars_integration.py => test_sparrow_integration.py} (51%) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 6db0537..cc997aa 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -50,10 +50,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - - name: Run Polars integration tests + - name: Run sparrow integration tests if: matrix.build_shared == 'ON' working-directory: build - run: cmake --build . --target run_polars_tests_direct + run: cmake --build . --target run_sparrow_tests_direct - name: Install working-directory: build diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 7a3dbb9..e5951ac 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -55,10 +55,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - - name: Run Polars integration tests + - name: Run Sparrow integration tests if: matrix.build_shared == 'ON' working-directory: build - run: cmake --build . --target run_polars_tests_direct + run: cmake --build . --target run_sparrow_tests_direct - name: Install working-directory: build diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index fef6f0a..24a1885 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -55,10 +55,10 @@ jobs: run: | cmake --build . --config ${{ matrix.build_type }} --target run_tests_with_junit_report - - name: Run Polars integration tests + - name: Run Sparrow integration tests if: matrix.build_shared == 'ON' working-directory: build - run: cmake --build . --config ${{ matrix.build_type }} --target run_polars_tests_direct + run: cmake --build . --config ${{ matrix.build_type }} --target run_sparrow_tests_direct - name: Install working-directory: build diff --git a/CMakeLists.txt b/CMakeLists.txt index 6946c66..32bb7dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,10 +151,12 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${BINARY_BUILD_DIR}") set(SPARROW_PYCAPSULE_HEADERS ${SPARROW_PYCAPSULE_INCLUDE_DIR}/sparrow-pycapsule/config/sparrow_pycapsule_version.hpp ${SPARROW_PYCAPSULE_INCLUDE_DIR}/sparrow-pycapsule/pycapsule.hpp + ${SPARROW_PYCAPSULE_INCLUDE_DIR}/sparrow-pycapsule/sparrow_array_python_class.hpp ) set(SPARROW_PYCAPSULE_SOURCES src/pycapsule.cpp + src/sparrow_array_python_class.cpp ) option(SPARROW_PYCAPSULE_BUILD_SHARED "Build sparrow pycapsule as a shared library" ON) diff --git a/include/sparrow-pycapsule/SparrowPythonClass.hpp b/include/sparrow-pycapsule/sparrow_array_python_class.hpp similarity index 100% rename from include/sparrow-pycapsule/SparrowPythonClass.hpp rename to include/sparrow-pycapsule/sparrow_array_python_class.hpp diff --git a/src/SparrowPythonClass.cpp b/src/sparrow_array_python_class.cpp similarity index 66% rename from src/SparrowPythonClass.cpp rename to src/sparrow_array_python_class.cpp index a796414..e5d07be 100644 --- a/src/SparrowPythonClass.cpp +++ b/src/sparrow_array_python_class.cpp @@ -1,4 +1,4 @@ -#include "sparrow-pycapsule/SparrowPythonClass.hpp" +#include "sparrow-pycapsule/sparrow_array_python_class.hpp" #include #include @@ -67,6 +67,81 @@ namespace sparrow::pycapsule return PyLong_FromSize_t(self->arr->size()); } + /** + * @brief Constructor for SparrowArray. + * + * Accepts an object implementing __arrow_c_array__ and imports it. + */ + static PyObject* SparrowArray_new(PyTypeObject* type, PyObject* args, PyObject* kwargs) + { + static const char* kwlist[] = {"arrow_array", nullptr}; + PyObject* arrow_array = nullptr; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", const_cast(kwlist), &arrow_array)) + { + return nullptr; + } + + // Get __arrow_c_array__ method from the input object + PyObject* arrow_c_array_method = PyObject_GetAttrString(arrow_array, "__arrow_c_array__"); + if (arrow_c_array_method == nullptr) + { + PyErr_SetString( + PyExc_TypeError, + "Input object must implement __arrow_c_array__ (ArrowArrayExportable protocol)" + ); + return nullptr; + } + + // Call __arrow_c_array__() to get the capsules + PyObject* capsules = PyObject_CallNoArgs(arrow_c_array_method); + Py_DECREF(arrow_c_array_method); + + if (capsules == nullptr) + { + return nullptr; + } + + // Unpack the tuple (schema_capsule, array_capsule) + if (!PyTuple_Check(capsules) || PyTuple_Size(capsules) != 2) + { + Py_DECREF(capsules); + PyErr_SetString(PyExc_TypeError, "__arrow_c_array__ must return a tuple of 2 elements"); + return nullptr; + } + + PyObject* schema_capsule = PyTuple_GetItem(capsules, 0); + PyObject* array_capsule = PyTuple_GetItem(capsules, 1); + + try + { + sparrow::array arr = import_array_from_capsules(schema_capsule, array_capsule); + Py_DECREF(capsules); + + // Allocate the object + SparrowArrayObject* self = reinterpret_cast(type->tp_alloc(type, 0)); + if (self == nullptr) + { + return nullptr; + } + + self->arr = new sparrow::array(std::move(arr)); + return reinterpret_cast(self); + } + catch (const std::bad_alloc&) + { + Py_DECREF(capsules); + PyErr_NoMemory(); + return nullptr; + } + catch (const std::exception& e) + { + Py_DECREF(capsules); + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } + } + static PyMethodDef SparrowArray_methods[] = { {"__arrow_c_array__", reinterpret_cast(SparrowArray_arrow_c_array), @@ -99,12 +174,17 @@ namespace sparrow::pycapsule .tp_dealloc = reinterpret_cast(SparrowArray_dealloc), .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = PyDoc_STR( - "SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n" + "SparrowArray(arrow_array) - Arrow array wrapper implementing __arrow_c_array__.\n\n" "This class wraps a sparrow array and implements the Arrow PyCapsule\n" "Interface (ArrowArrayExportable protocol), allowing it to be passed\n" - "directly to libraries like Polars via pl.from_arrow()." + "directly to libraries like Polars via pl.from_arrow().\n\n" + "Parameters\n" + "----------\n" + "arrow_array : ArrowArrayExportable\n" + " An object implementing __arrow_c_array__ (e.g., PyArrow array)." ), .tp_methods = SparrowArray_methods, + .tp_new = SparrowArray_new, }; static bool type_initialized = false; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ed875e1..67139b3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -85,28 +85,23 @@ add_custom_target(run_tests_with_junit_report set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utilities") -# Polars integration test helper - Native Python Extension Module -# =============================================================== -# This builds a proper Python extension module (.cpython-*.so) that shares -# the same Python runtime as the interpreter, avoiding dual-runtime issues. +Python_add_library(test_sparrow_helper MODULE test_sparrow_helper_module.cpp) -Python_add_library(test_polars_helper MODULE test_polars_helper_module.cpp) - -target_link_libraries(test_polars_helper +target_link_libraries(test_sparrow_helper PRIVATE sparrow-pycapsule sparrow::sparrow ) -target_compile_features(test_polars_helper PRIVATE cxx_std_20) +target_compile_features(test_sparrow_helper PRIVATE cxx_std_20) if(MSVC) - target_compile_options(test_polars_helper PRIVATE /W4) + target_compile_options(test_sparrow_helper PRIVATE /W4) else() - target_compile_options(test_polars_helper PRIVATE -Wall -Wextra -Wpedantic) + target_compile_options(test_sparrow_helper PRIVATE -Wall -Wextra -Wpedantic) endif() -set_target_properties(test_polars_helper PROPERTIES +set_target_properties(test_sparrow_helper PROPERTIES FOLDER tests LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE} # Python modules must not have a debug suffix - Python won't find them @@ -120,29 +115,28 @@ find_package(Python COMPONENTS Interpreter QUIET) if(Python_Interpreter_FOUND) # Add a test that runs the Python integration script add_test( - NAME test_polars_integration - COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_polars_integration.py + NAME test_sparrow_integration + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_sparrow_integration.py WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) # Set environment variables so Python can find the libraries # Use generator expressions to get the actual library paths from targets - set_tests_properties(test_polars_integration PROPERTIES - ENVIRONMENT "TEST_POLARS_HELPER_LIB_PATH=$;SPARROW_PYCAPSULE_LIB_PATH=$" + set_tests_properties(test_sparrow_integration PROPERTIES + ENVIRONMENT "TEST_SPARROW_HELPER_LIB_PATH=$;SPARROW_PYCAPSULE_LIB_PATH=$" TIMEOUT 300 - DEPENDS test_polars_helper + DEPENDS test_sparrow_helper ) - message(STATUS "Added Polars integration test (Python ${Python_VERSION})") + message(STATUS "Added sparrow integration test (Python ${Python_VERSION})") else() - message(WARNING "Python interpreter not found, skipping Polars integration test") + message(WARNING "Python interpreter not found, skipping sparrow integration test") endif() -# Custom target to run Polars tests directly (with better output) if(Python_Interpreter_FOUND) - add_custom_target(run_polars_tests_direct + add_custom_target(run_sparrow_tests_direct COMMAND ${CMAKE_COMMAND} -E echo "==================================" - COMMAND ${CMAKE_COMMAND} -E echo "Polars Integration Test Runner" + COMMAND ${CMAKE_COMMAND} -E echo "Sparrow Integration Test Runner" COMMAND ${CMAKE_COMMAND} -E echo "==================================" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Checking Python dependencies..." @@ -150,47 +144,47 @@ if(Python_Interpreter_FOUND) COMMAND ${Python_EXECUTABLE} -c "import pyarrow" || ${CMAKE_COMMAND} -E cmake_echo_color --red "ERROR: pyarrow not installed. Install with: pip install pyarrow" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Library paths:" - COMMAND ${CMAKE_COMMAND} -E echo " TEST_POLARS_HELPER_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." COMMAND ${CMAKE_COMMAND} -E echo "" COMMAND ${CMAKE_COMMAND} -E env - "TEST_POLARS_HELPER_LIB_PATH=$" + "TEST_SPARROW_HELPER_LIB_PATH=$" "SPARROW_PYCAPSULE_LIB_PATH=$" - ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_polars_integration.py + ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_sparrow_integration.py COMMAND ${CMAKE_COMMAND} -E echo "" - DEPENDS test_polars_helper sparrow-pycapsule + DEPENDS test_sparrow_helper sparrow-pycapsule WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Running Polars integration tests directly" + COMMENT "Running Sparrow integration tests directly" USES_TERMINAL ) - set_target_properties(run_polars_tests_direct PROPERTIES FOLDER "Tests utilities") + set_target_properties(run_sparrow_tests_direct PROPERTIES FOLDER "Tests utilities") - # Custom target to check Polars dependencies - add_custom_target(check_polars_deps - COMMAND ${CMAKE_COMMAND} -E echo "Checking Polars integration test dependencies..." + # Custom target to check Sparrow dependencies + add_custom_target(check_sparrow_deps + COMMAND ${CMAKE_COMMAND} -E echo "Checking Sparrow integration test dependencies..." COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/check_deps.py COMMAND ${CMAKE_COMMAND} -E echo "Environment variables that will be set:" - COMMAND ${CMAKE_COMMAND} -E echo " TEST_POLARS_HELPER_LIB_PATH=$" + COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Checking Polars test dependencies" + COMMENT "Checking Sparrow test dependencies" USES_TERMINAL ) - set_target_properties(check_polars_deps PROPERTIES FOLDER "Tests utilities") + set_target_properties(check_sparrow_deps PROPERTIES FOLDER "Tests utilities") # Minimal library loading test for debugging segfaults add_custom_target(test_library_load COMMAND ${CMAKE_COMMAND} -E env - "TEST_POLARS_HELPER_LIB_PATH=$" + "TEST_SPARROW_HELPER_LIB_PATH=$" "SPARROW_PYCAPSULE_LIB_PATH=$" "PYTHONUNBUFFERED=1" ${Python_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/test_library_load.py DEPENDS - test_polars_helper + test_sparrow_helper sparrow-pycapsule WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "Testing library loading step-by-step" diff --git a/test/sparrow_helpers.py b/test/sparrow_helpers.py new file mode 100644 index 0000000..24d64b6 --- /dev/null +++ b/test/sparrow_helpers.py @@ -0,0 +1,72 @@ +""" +Python helper module for sparrow-pycapsule integration tests. + +This module provides helper functions that wrap the C++ SparrowArray class, +making it easy to create test arrays and perform roundtrip operations. +""" + +from __future__ import annotations + +import sys +import os +from pathlib import Path +from typing import Any, Protocol, Tuple + +class ArrowArrayExportable(Protocol): + """Protocol for objects implementing the Arrow PyCapsule Interface.""" + + def __arrow_c_array__( + self, requested_schema: Any = None + ) -> Tuple[Any, Any]: + """Export the array as Arrow PyCapsules. + + Returns + ------- + Tuple[Any, Any] + A tuple of (schema_capsule, array_capsule). + """ + ... + + +class SparrowArrayType(ArrowArrayExportable, Protocol): + """Type definition for SparrowArray from C++ extension.""" + + def size(self) -> int: + """Get the number of elements in the array.""" + ... + + +def _setup_module_path() -> None: + """Add the build directory to Python path so we can import test_sparrow_helper.""" + # Check for environment variable first + helper_path = os.environ.get('TEST_SPARROW_HELPER_PATH') + if helper_path: + module_dir = Path(helper_path).parent + if module_dir.exists(): + sys.path.insert(0, str(module_dir)) + return + + # Try to find in build directory + test_dir = Path(__file__).parent + build_dirs = [ + test_dir.parent / "build" / "bin" / "Debug", + test_dir.parent / "build" / "bin" / "Release", + test_dir.parent / "build" / "bin", + ] + + for build_dir in build_dirs: + if build_dir.exists(): + sys.path.insert(0, str(build_dir)) + return + + raise ImportError( + "Could not find test_sparrow_helper module. " + "Build the project first or set TEST_SPARROW_HELPER_PATH." + ) + + +# Set up module path and import the C++ module +_setup_module_path() + +# Import the native Python extension module that provides SparrowArray +from test_sparrow_helper import SparrowArray # noqa: E402 diff --git a/test/test_library_load.py b/test/test_library_load.py index b577365..0afc7c9 100644 --- a/test/test_library_load.py +++ b/test/test_library_load.py @@ -22,14 +22,14 @@ def main(): try: step(1, "Checking environment variables") - helper_path = os.environ.get('TEST_POLARS_HELPER_LIB_PATH') + helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') main_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') - print(f"TEST_POLARS_HELPER_LIB_PATH: {helper_path}") + print(f"TEST_SPARROW_HELPER_LIB_PATH: {helper_path}") print(f"SPARROW_PYCAPSULE_LIB_PATH: {main_path}") if not helper_path: - print("ERROR: TEST_POLARS_HELPER_LIB_PATH not set") + print("ERROR: TEST_SPARROW_HELPER_LIB_PATH not set") return 1 if not main_path: @@ -63,7 +63,7 @@ def main(): print(f"✗ Failed to load main library: {e}") return 1 - step(5, "Loading test_polars_helper library") + step(5, "Loading test_sparrow_helper library") try: helper_lib = ctypes.CDLL(str(helper_file)) print("✓ Helper library loaded successfully") @@ -71,18 +71,6 @@ def main(): print(f"✗ Failed to load helper library: {e}") return 1 - step(6, "Checking for init_python function") - try: - if hasattr(helper_lib, 'init_python'): - print("✓ init_python function found") - else: - print("✗ init_python function not found") - print(f"Available attributes: {dir(helper_lib)}") - return 1 - except Exception as e: - print(f"✗ Error checking for init_python: {e}") - return 1 - print("\n" + "="*60) print("✓ ALL STEPS COMPLETED SUCCESSFULLY") print("="*60) diff --git a/test/test_polars_helper.cpp b/test/test_polars_helper.cpp deleted file mode 100644 index a9ad5b5..0000000 --- a/test/test_polars_helper.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/** - * @file test_polars_helper.cpp - * @brief C++ helper library for Polars integration tests. - * - * This library provides C functions that can be called from Python via ctypes - * to test the bidirectional data exchange between Polars and sparrow using - * the sparrow::pycapsule interface. - */ - -#include "test_polars_helper.hpp" - -#include -#include -#include - -#include -#include -#include - -#include - -extern "C" -{ - int create_test_array_capsules(PyObject** schema_capsule_out, PyObject** array_capsule_out) - { - try - { - std::vector> values = { - sparrow::make_nullable(10, true), - sparrow::make_nullable(20, true), - sparrow::make_nullable(0, false), // null - sparrow::make_nullable(40, true), - sparrow::make_nullable(50, true) - }; - - sparrow::primitive_array prim_array(std::move(values)); - sparrow::array arr(std::move(prim_array)); - - auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - - if (schema_capsule == nullptr || array_capsule == nullptr) - { - std::cerr << "Failed to create PyCapsules\n"; - Py_XDECREF(schema_capsule); - Py_XDECREF(array_capsule); - return -1; - } - - *schema_capsule_out = schema_capsule; - *array_capsule_out = array_capsule; - - return 0; - } - catch (const std::exception& e) - { - std::cerr << "Exception in create_test_array_capsules: " << e.what() << '\n'; - return -1; - } - } - - int roundtrip_array_capsules( - PyObject* schema_capsule_in, - PyObject* array_capsule_in, - PyObject** schema_capsule_out, - PyObject** array_capsule_out - ) - { - try - { - if (schema_capsule_in == nullptr || array_capsule_in == nullptr) - { - std::cerr << "Null input capsules\n"; - return -1; - } - - sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( - schema_capsule_in, - array_capsule_in - ); - - std::cout << "Roundtrip array size: " << arr.size() << '\n'; - - auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - - if (schema_capsule == nullptr || array_capsule == nullptr) - { - std::cerr << "Failed to create output PyCapsules\n"; - Py_XDECREF(schema_capsule); - Py_XDECREF(array_capsule); - return -1; - } - - *schema_capsule_out = schema_capsule; - *array_capsule_out = array_capsule; - - return 0; - } - catch (const std::exception& e) - { - std::cerr << "Exception in roundtrip_array_capsules: " << e.what() << '\n'; - return -1; - } - } - - int verify_array_size_from_capsules(PyObject* schema_capsule, PyObject* array_capsule, size_t expected_size) - { - try - { - if (schema_capsule == nullptr || array_capsule == nullptr) - { - std::cerr << "Null capsules provided\n"; - return -1; - } - - sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( - schema_capsule, - array_capsule - ); - - if (arr.size() == expected_size) - { - std::cout << "Array size verified: " << arr.size() << '\n'; - return 0; - } - else - { - std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << '\n'; - return -1; - } - } - catch (const std::exception& e) - { - std::cerr << "Exception in verify_array_size_from_capsules: " << e.what() << '\n'; - return -1; - } - } -} diff --git a/test/test_polars_helper.hpp b/test/test_polars_helper.hpp deleted file mode 100644 index 51473a9..0000000 --- a/test/test_polars_helper.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/** - * @file test_polars_helper.hpp - * @brief C++ helper library declarations for Polars integration tests. - * - * This header declares C functions that can be called from Python via ctypes - * to test the bidirectional data exchange between Polars and sparrow using - * the sparrow::pycapsule interface. - */ - -#ifndef SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP -#define SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP - -#include - -#include -#include - -extern "C" -{ - /** - * @brief Create a test array and return PyCapsules. - * - * Uses sparrow::pycapsule::export_array_to_capsules() to create the capsules. - * - * @param schema_capsule_out Output parameter for schema PyCapsule - * @param array_capsule_out Output parameter for array PyCapsule - * @return 0 on success, -1 on error - */ - SPARROW_PYCAPSULE_API int create_test_array_capsules(PyObject** schema_capsule_out, PyObject** array_capsule_out); - - /** - * @brief Import array from PyCapsules and return new PyCapsules. - * - * Uses sparrow::pycapsule::import_array_from_capsules() and - * sparrow::pycapsule::export_array_to_capsules(). - * - * @param schema_capsule_in Input schema PyCapsule - * @param array_capsule_in Input array PyCapsule - * @param schema_capsule_out Output schema PyCapsule - * @param array_capsule_out Output array PyCapsule - * @return 0 on success, -1 on error - */ - SPARROW_PYCAPSULE_API int roundtrip_array_capsules( - PyObject* schema_capsule_in, - PyObject* array_capsule_in, - PyObject** schema_capsule_out, - PyObject** array_capsule_out - ); - - /** - * @brief Verify that array imported from PyCapsules has the expected size. - * - * Uses sparrow::pycapsule::import_array_from_capsules(). - * - * @param schema_capsule Schema PyCapsule - * @param array_capsule Array PyCapsule - * @param expected_size Expected array size - * @return 0 if size matches, -1 otherwise - */ - SPARROW_PYCAPSULE_API int verify_array_size_from_capsules( - PyObject* schema_capsule, - PyObject* array_capsule, - size_t expected_size - ); -} - -#endif // SPARROW_PYCAPSULE_TEST_POLARS_HELPER_HPP diff --git a/test/test_polars_helper_module.cpp b/test/test_polars_helper_module.cpp deleted file mode 100644 index 4e8155c..0000000 --- a/test/test_polars_helper_module.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/** - * @file test_polars_helper_module.cpp - * @brief Native Python extension module for Polars integration tests. - * - * This is a native Python extension module (not pybind11/nanobind) that tests - * the sparrow::pycapsule interface. Being a proper extension module, it shares - * the same Python runtime as the interpreter, avoiding the dual-runtime issues - * that occur with ctypes. - */ - -#define PY_SSIZE_T_CLEAN -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -/** - * Create a test array and return a SparrowArray object. - * - * Python signature: create_test_array() -> SparrowArray - */ -static PyObject* py_create_test_array(PyObject* self, PyObject* args) -{ - (void)self; - (void)args; - - try - { - // Create a test array with nullable integers - std::vector> values = { - sparrow::make_nullable(10, true), - sparrow::make_nullable(20, true), - sparrow::make_nullable(0, false), // null - sparrow::make_nullable(40, true), - sparrow::make_nullable(50, true) - }; - - sparrow::primitive_array prim_array(std::move(values)); - sparrow::array arr(std::move(prim_array)); - - // Return a SparrowArray object that implements __arrow_c_array__ - return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } -} - -/** - * Create a test array and return PyCapsules. - * - * Python signature: create_test_array_capsules() -> tuple[capsule, capsule] - */ -static PyObject* py_create_test_array_capsules(PyObject* self, PyObject* args) -{ - (void)self; - (void)args; - - try - { - // Create a test array with nullable integers - std::vector> values = { - sparrow::make_nullable(10, true), - sparrow::make_nullable(20, true), - sparrow::make_nullable(0, false), // null - sparrow::make_nullable(40, true), - sparrow::make_nullable(50, true) - }; - - sparrow::primitive_array prim_array(std::move(values)); - sparrow::array arr(std::move(prim_array)); - - auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - - if (schema_capsule == nullptr || array_capsule == nullptr) - { - Py_XDECREF(schema_capsule); - Py_XDECREF(array_capsule); - PyErr_SetString(PyExc_RuntimeError, "Failed to create PyCapsules"); - return nullptr; - } - - PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - return result; - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } -} - -/** - * Import from an object implementing __arrow_c_array__ and return a SparrowArray. - * - * Python signature: roundtrip_array(arrow_array) -> SparrowArray - */ -static PyObject* py_roundtrip_array(PyObject* self, PyObject* args) -{ - (void)self; - - PyObject* arrow_array = nullptr; - - if (!PyArg_ParseTuple(args, "O", &arrow_array)) - { - return nullptr; - } - - // Get __arrow_c_array__ method from the input object - PyObject* arrow_c_array_method = PyObject_GetAttrString(arrow_array, "__arrow_c_array__"); - if (arrow_c_array_method == nullptr) - { - PyErr_SetString(PyExc_TypeError, "Object does not implement __arrow_c_array__"); - return nullptr; - } - - // Call __arrow_c_array__() to get (schema_capsule, array_capsule) - PyObject* capsules = PyObject_CallObject(arrow_c_array_method, nullptr); - Py_DECREF(arrow_c_array_method); - - if (capsules == nullptr) - { - return nullptr; - } - - if (!PyTuple_Check(capsules) || PyTuple_Size(capsules) != 2) - { - Py_DECREF(capsules); - PyErr_SetString(PyExc_TypeError, "__arrow_c_array__ must return a tuple of 2 capsules"); - return nullptr; - } - - PyObject* schema_capsule = PyTuple_GetItem(capsules, 0); - PyObject* array_capsule = PyTuple_GetItem(capsules, 1); - - try - { - // Import from PyCapsules using sparrow::pycapsule - sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( - schema_capsule, - array_capsule - ); - - Py_DECREF(capsules); - - std::cout << "Roundtrip array size: " << arr.size() << '\n'; - - // Return a SparrowArray object that implements __arrow_c_array__ - return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); - } - catch (const std::exception& e) - { - Py_DECREF(capsules); - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } -} - -/** - * Import array from PyCapsules and return new PyCapsules (roundtrip). - * - * Python signature: roundtrip_array_capsules(schema_capsule, array_capsule) -> tuple[capsule, capsule] - */ -static PyObject* py_roundtrip_array_capsules(PyObject* self, PyObject* args) -{ - (void)self; - - PyObject* schema_capsule_in = nullptr; - PyObject* array_capsule_in = nullptr; - - if (!PyArg_ParseTuple(args, "OO", &schema_capsule_in, &array_capsule_in)) - { - return nullptr; - } - - try - { - // Import from PyCapsules using sparrow::pycapsule - sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( - schema_capsule_in, - array_capsule_in - ); - - std::cout << "Roundtrip array size: " << arr.size() << '\n'; - - // Export back to PyCapsules - auto [schema_capsule, array_capsule] = sparrow::pycapsule::export_array_to_capsules(arr); - - if (schema_capsule == nullptr || array_capsule == nullptr) - { - Py_XDECREF(schema_capsule); - Py_XDECREF(array_capsule); - PyErr_SetString(PyExc_RuntimeError, "Failed to create output PyCapsules"); - return nullptr; - } - - // Return as a tuple - PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule); - Py_DECREF(schema_capsule); - Py_DECREF(array_capsule); - return result; - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } -} - -/** - * Verify that array imported from PyCapsules has the expected size. - * - * Python signature: verify_array_size_from_capsules(schema_capsule, array_capsule, expected_size) -> bool - */ -static PyObject* py_verify_array_size_from_capsules(PyObject* self, PyObject* args) -{ - (void)self; - - PyObject* schema_capsule = nullptr; - PyObject* array_capsule = nullptr; - Py_ssize_t expected_size = 0; - - if (!PyArg_ParseTuple(args, "OOn", &schema_capsule, &array_capsule, &expected_size)) - { - return nullptr; - } - - try - { - // Import from PyCapsules using sparrow::pycapsule - sparrow::array arr = sparrow::pycapsule::import_array_from_capsules( - schema_capsule, - array_capsule - ); - - std::cout << "Array size verified: " << arr.size() << '\n'; - - if (static_cast(arr.size()) == expected_size) - { - Py_RETURN_TRUE; - } - else - { - std::cerr << "Size mismatch: expected " << expected_size << ", got " << arr.size() << '\n'; - Py_RETURN_FALSE; - } - } - catch (const std::exception& e) - { - PyErr_SetString(PyExc_RuntimeError, e.what()); - return nullptr; - } -} - -// Method definitions -static PyMethodDef TestPolarsHelperMethods[] = { - { - "create_test_array", - py_create_test_array, - METH_NOARGS, - "Create a test array and return a SparrowArray object implementing __arrow_c_array__." - }, - { - "create_test_array_capsules", - py_create_test_array_capsules, - METH_NOARGS, - "Create a test array and return (schema_capsule, array_capsule) tuple." - }, - { - "roundtrip_array", - py_roundtrip_array, - METH_VARARGS, - "Import from an object implementing __arrow_c_array__ and return a SparrowArray." - }, - { - "roundtrip_array_capsules", - py_roundtrip_array_capsules, - METH_VARARGS, - "Import array from capsules and export back to new capsules." - }, - { - "verify_array_size_from_capsules", - py_verify_array_size_from_capsules, - METH_VARARGS, - "Verify that array from capsules has the expected size." - }, - {nullptr, nullptr, 0, nullptr} // Sentinel -}; - -// Module definition -static struct PyModuleDef test_polars_helper_module = { - PyModuleDef_HEAD_INIT, - "test_polars_helper", // Module name - "Test helper module for sparrow-pycapsule Polars integration tests.\n" - "This module tests the sparrow::pycapsule interface for Arrow data exchange.", - -1, // Module state size (-1 = no state) - TestPolarsHelperMethods -}; - -// Module initialization function -PyMODINIT_FUNC PyInit_test_polars_helper(void) -{ - PyObject* module = PyModule_Create(&test_polars_helper_module); - if (module == nullptr) - { - return nullptr; - } - - // Register the SparrowArray type with this module - if (sparrow::pycapsule::register_sparrow_array_type(module) < 0) - { - Py_DECREF(module); - return nullptr; - } - - return module; -} diff --git a/test/test_sparrow_helper_module.cpp b/test/test_sparrow_helper_module.cpp new file mode 100644 index 0000000..178dade --- /dev/null +++ b/test/test_sparrow_helper_module.cpp @@ -0,0 +1,95 @@ +/** + * @file test_sparrow_helper_module.cpp + * @brief Native Python extension module for sparrow integration tests. + * + * This is a minimal native Python extension module that registers the + * SparrowArray type and provides a function to create test arrays. + * The higher-level helper logic is in sparrow_helpers.py. + */ + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include + +#include +#include +#include + +#include +#include + +/** + * Create a test array and return a SparrowArray object. + * + * Python signature: create_test_array() -> SparrowArray + */ +static PyObject* py_create_test_array(PyObject* self, PyObject* args) +{ + (void)self; + (void)args; + + try + { + // Create a test array with nullable integers + std::vector> values = { + sparrow::make_nullable(10, true), + sparrow::make_nullable(20, true), + sparrow::make_nullable(0, false), // null + sparrow::make_nullable(40, true), + sparrow::make_nullable(50, true) + }; + + sparrow::primitive_array prim_array(std::move(values)); + sparrow::array arr(std::move(prim_array)); + + // Return a SparrowArray object that implements __arrow_c_array__ + return sparrow::pycapsule::create_sparrow_array_object(std::move(arr)); + } + catch (const std::exception& e) + { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + +// Method definitions +static PyMethodDef TestSparrowHelperMethods[] = { + { + "create_test_array", + py_create_test_array, + METH_NOARGS, + "Create a test array and return a SparrowArray object implementing __arrow_c_array__." + }, + {nullptr, nullptr, 0, nullptr} // Sentinel +}; + +// Module definition +static struct PyModuleDef test_sparrow_helper_module = { + PyModuleDef_HEAD_INIT, + "test_sparrow_helper", // Module name + "Native Python extension providing SparrowArray type for Arrow data exchange.\n" + "Higher-level helpers are available in sparrow_helpers.py.", + -1, // Module state size (-1 = no state) + TestSparrowHelperMethods +}; + +// Module initialization function +PyMODINIT_FUNC PyInit_test_sparrow_helper(void) +{ + PyObject* module = PyModule_Create(&test_sparrow_helper_module); + if (module == nullptr) + { + return nullptr; + } + + // Register the SparrowArray type with this module + if (sparrow::pycapsule::register_sparrow_array_type(module) < 0) + { + Py_DECREF(module); + return nullptr; + } + + return module; +} diff --git a/test/test_polars_integration.py b/test/test_sparrow_integration.py similarity index 51% rename from test/test_polars_integration.py rename to test/test_sparrow_integration.py index 574aeca..a249752 100644 --- a/test/test_polars_integration.py +++ b/test/test_sparrow_integration.py @@ -11,8 +11,6 @@ """ import sys -import os -from pathlib import Path import pytest import polars as pl @@ -21,61 +19,33 @@ from polars._utils.wrap import wrap_s -def setup_module_path(): - """Add the build directory to Python path so we can import test_polars_helper.""" - # Check for environment variable first - helper_path = os.environ.get('TEST_POLARS_HELPER_PATH') - if helper_path: - module_dir = Path(helper_path).parent - if module_dir.exists(): - sys.path.insert(0, str(module_dir)) - return +# Import helpers from our Python module +from sparrow_helpers import ( + ArrowArrayExportable, + SparrowArray, + SparrowArrayType, +) - # Try to find in build directory - test_dir = Path(__file__).parent - build_dirs = [ - test_dir.parent / "build" / "bin" / "Debug", - test_dir.parent / "build" / "bin" / "Release", - test_dir.parent / "build" / "bin", - ] +# Import the C++ module for create_test_array +import test_sparrow_helper # noqa: E402 - for build_dir in build_dirs: - if build_dir.exists(): - sys.path.insert(0, str(build_dir)) - return - raise ImportError( - "Could not find test_polars_helper module. " - "Build the project first or set TEST_POLARS_HELPER_PATH." - ) - - -# Set up module path before importing -setup_module_path() - -# Import the native Python extension module -import test_polars_helper # noqa: E402 - - -# ============================================================================= -# Helper function to convert ArrowArrayExportable to Polars Series -# ============================================================================= - - -def arrow_array_to_series(arrow_array, name: str = "") -> pl.Series: +def arrow_array_to_series( + arrow_array: ArrowArrayExportable, name: str = "" +) -> pl.Series: """ Convert an object implementing __arrow_c_array__ to a Polars Series. - + This function uses Polars' internal PySeries.from_arrow_c_array to create a Series directly from an Arrow array. - + Parameters ---------- arrow_array : ArrowArrayExportable An object that implements __arrow_c_array__ method. name : str, optional Name for the resulting Series. Default is empty string. - + Returns ------- pl.Series @@ -98,27 +68,49 @@ class TestSparrowToPolars: def test_create_sparrow_array(self): """Create a SparrowArray in C++ that implements __arrow_c_array__.""" - sparrow_array = test_polars_helper.create_test_array() - + sparrow_array = test_sparrow_helper.create_test_array() + assert sparrow_array is not None, "Received null SparrowArray from C++" - assert hasattr(sparrow_array, '__arrow_c_array__'), "SparrowArray missing __arrow_c_array__ method" + assert hasattr(sparrow_array, "__arrow_c_array__"), ( + "SparrowArray missing __arrow_c_array__ method" + ) assert sparrow_array.size() == 5, f"Expected size 5, got {sparrow_array.size()}" + def test_sparrow_array_type(self): + """Verify that created array is a sparrow.SparrowArray instance.""" + sparrow_array = test_sparrow_helper.create_test_array() + + # Check the type name + type_name = type(sparrow_array).__name__ + assert type_name == "SparrowArray", ( + f"Expected type 'SparrowArray', got '{type_name}'" + ) + + # Check the module-qualified name + full_name = f"{type(sparrow_array).__module__}.{type_name}" + assert full_name == "sparrow.SparrowArray", ( + f"Expected 'sparrow.SparrowArray', got '{full_name}'" + ) + def test_sparrow_to_polars_series(self): """Convert SparrowArray to Polars Series using the Arrow PyCapsule Interface.""" - sparrow_array = test_polars_helper.create_test_array() + sparrow_array = test_sparrow_helper.create_test_array() polars_series = arrow_array_to_series(sparrow_array) - assert polars_series.dtype == pl.Int32, f"Expected Int32, got {polars_series.dtype}" + assert polars_series.dtype == pl.Int32, ( + f"Expected Int32, got {polars_series.dtype}" + ) expected = [10, 20, None, 40, 50] actual = polars_series.to_list() - assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" + assert expected == actual, ( + f"Data mismatch! Expected: {expected}, Actual: {actual}" + ) def test_sparrow_to_polars_preserves_nulls(self): """Verify that null values from sparrow are preserved in Polars.""" - sparrow_array = test_polars_helper.create_test_array() + sparrow_array = test_sparrow_helper.create_test_array() polars_series = arrow_array_to_series(sparrow_array) - + # The test array has a null at index 2 values = polars_series.to_list() assert values[2] is None, "Null value not preserved at index 2" @@ -132,73 +124,62 @@ def test_sparrow_to_polars_preserves_nulls(self): class TestPyArrowToSparrow: """Test creating an array in PyArrow and importing to sparrow.""" - def test_pyarrow_to_sparrow_via_capsules(self): - """Import PyArrow array to sparrow using PyCapsules.""" + def test_create_sparrow_array_from_pyarrow(self): + """Create a SparrowArray directly from a PyArrow array using the constructor.""" # Create a PyArrow array pa_array = pa.array([100, 200, None, 400, 500], type=pa.int32()) - - # Export to PyCapsules using Arrow PyCapsule Interface - schema_capsule, array_capsule = pa_array.__arrow_c_array__() - - # Verify sparrow can import and read the data - result = test_polars_helper.verify_array_size_from_capsules( - schema_capsule, array_capsule, 5 - ) - assert result is True, "Sparrow failed to import PyArrow array" + + # Create SparrowArray directly using the type constructor + sparrow_array = SparrowArray(pa_array) + + # Verify it's a SparrowArray + assert type(sparrow_array).__name__ == "SparrowArray" + assert sparrow_array.size() == 5 + + # Verify we can convert it to Polars + polars_series = arrow_array_to_series(sparrow_array) + expected = [100, 200, None, 400, 500] + assert polars_series.to_list() == expected + + def test_pyarrow_to_sparrow(self): + """Import PyArrow array to sparrow using Arrow PyCapsule Interface.""" + # Create a PyArrow array + pa_array = pa.array([100, 200, None, 400, 500], type=pa.int32()) + + # Verify sparrow can import and read the data via __arrow_c_array__ + + sparrow_array: SparrowArrayType = SparrowArray(pa_array) + assert sparrow_array.size() == 5 def test_pyarrow_roundtrip_through_sparrow(self): """Round-trip: PyArrow → sparrow → Polars.""" # Create a PyArrow array pa_array = pa.array([1, 2, None, 4, 5], type=pa.int32()) - - # Export to PyCapsules - schema_capsule, array_capsule = pa_array.__arrow_c_array__() - - # Round-trip through sparrow (import then export) - schema_out, array_out = test_polars_helper.roundtrip_array_capsules( - schema_capsule, array_capsule - ) - - # Import the result into Polars using a wrapper - class CapsuleWrapper: - def __init__(self, schema, array): - self._schema = schema - self._array = array - def __arrow_c_array__(self, requested_schema=None): - return self._schema, self._array - - wrapper = CapsuleWrapper(schema_out, array_out) - result_series = arrow_array_to_series(wrapper) - + + # Round-trip through sparrow (import then export as SparrowArray) + sparrow_array = SparrowArray(pa_array) + + # Import the result into Polars + result_series = arrow_array_to_series(sparrow_array) + # Verify data matches expected = [1, 2, None, 4, 5] actual = result_series.to_list() - assert expected == actual, f"Data mismatch! Expected: {expected}, Actual: {actual}" + assert expected == actual, ( + f"Data mismatch! Expected: {expected}, Actual: {actual}" + ) def test_pyarrow_nulls_preserved_in_sparrow(self): """Verify that null values from PyArrow are preserved through sparrow.""" # Create a PyArrow array with nulls pa_array = pa.array([None, 1, None, 3, None], type=pa.int32()) - - # Export to PyCapsules - schema_capsule, array_capsule = pa_array.__arrow_c_array__() - + # Round-trip through sparrow - schema_out, array_out = test_polars_helper.roundtrip_array_capsules( - schema_capsule, array_capsule - ) - + sparrow_array = SparrowArray(pa_array) + # Import into Polars - class CapsuleWrapper: - def __init__(self, schema, array): - self._schema = schema - self._array = array - def __arrow_c_array__(self, requested_schema=None): - return self._schema, self._array - - wrapper = CapsuleWrapper(schema_out, array_out) - result_series = arrow_array_to_series(wrapper) - + result_series = arrow_array_to_series(sparrow_array) + # Check null positions values = result_series.to_list() assert values[0] is None, "Null not preserved at index 0" From 08002fbd9af808ad04234f893a81be34981ebe9d Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Wed, 26 Nov 2025 13:44:16 +0100 Subject: [PATCH 15/18] wip --- test/test_sparrow_helper_module.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/test_sparrow_helper_module.cpp b/test/test_sparrow_helper_module.cpp index 178dade..acb701e 100644 --- a/test/test_sparrow_helper_module.cpp +++ b/test/test_sparrow_helper_module.cpp @@ -1,12 +1,3 @@ -/** - * @file test_sparrow_helper_module.cpp - * @brief Native Python extension module for sparrow integration tests. - * - * This is a minimal native Python extension module that registers the - * SparrowArray type and provides a function to create test arrays. - * The higher-level helper logic is in sparrow_helpers.py. - */ - #define PY_SSIZE_T_CLEAN #include From 0978d6fd711da1cb63e6e272aa12d1076e6d26b1 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Wed, 26 Nov 2025 13:50:47 +0100 Subject: [PATCH 16/18] wip --- README.md | 139 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 88 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 0da23c8..fceb869 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ The Sparrow PyCapsule Interface - A C++ library for exchanging Apache Arrow data - Exporting sparrow arrays to Python as PyCapsules (Arrow C Data Interface) - Importing Arrow data from Python PyCapsules into sparrow arrays - Zero-copy data exchange with Python libraries like Polars, PyArrow, and pandas +- A `SparrowArray` Python class that implements the Arrow PyCapsule Interface ## Features @@ -17,6 +18,7 @@ The Sparrow PyCapsule Interface - A C++ library for exchanging Apache Arrow data - ✅ **Compatible with Polars, PyArrow, pandas** and other Arrow-based libraries - ✅ **Bidirectional** data flow (C++ ↔ Python) - ✅ **Type-safe** with proper ownership semantics +- ✅ **SparrowArray Python class** implementing `__arrow_c_array__` protocol ## Building @@ -53,54 +55,56 @@ ctest --output-on-failure ## Usage Example -### C++ Side: Exporting Data +### C++ Side: Creating a SparrowArray for Python ```cpp #include +#include #include // Create a sparrow array sparrow::array my_array = /* ... */; -// Export to PyCapsules for Python consumption -auto [schema_capsule, array_capsule] = - sparrow::pycapsule::export_array_to_capsules(my_array); +// Create a SparrowArray Python object that implements __arrow_c_array__ +PyObject* sparrow_array = sparrow::pycapsule::create_sparrow_array_object(std::move(my_array)); -// Pass capsules to Python (via Python C API, pybind11, etc.) +// Return to Python - it can be used directly with Polars, PyArrow, etc. ``` -### Python Side: Consuming C++ Data +### Python Side: Using SparrowArray ```python +from test_sparrow_helper import SparrowArray import polars as pl import pyarrow as pa -# Receive capsules from C++ -# schema_capsule, array_capsule = get_from_cpp() +# Create SparrowArray from any Arrow-compatible object +pa_array = pa.array([1, 2, None, 4, 5], type=pa.int32()) +sparrow_array = SparrowArray(pa_array) -# Import into PyArrow -arrow_array = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) +# SparrowArray implements __arrow_c_array__, so it works with Polars +# Using Polars internal API for primitive arrays: +from polars._plr import PySeries +from polars._utils.wrap import wrap_s -# Convert to Polars -series = pl.from_arrow(arrow_array) +ps = PySeries.from_arrow_c_array(sparrow_array) +series = wrap_s(ps) +print(series) # shape: (5,), dtype: Int32 -# Use in Polars DataFrame -df = pl.DataFrame({"my_column": series}) +# Get array size +print(sparrow_array.size()) # 5 ``` ### Python Side: Exporting to C++ ```python -import polars as pl - -# Create Polars data -series = pl.Series([1, 2, None, 4, 5]) +import pyarrow as pa -# Convert to Arrow and export as capsules -arrow_array = series.to_arrow() -schema_capsule, array_capsule = arrow_array.__arrow_c_array__() +# Any object implementing __arrow_c_array__ can be imported by sparrow +arrow_array = pa.array([1, 2, None, 4, 5]) -# Pass to C++ +# The SparrowArray constructor accepts any ArrowArrayExportable +sparrow_array = SparrowArray(arrow_array) ``` ### C++ Side: Importing from Python @@ -108,7 +112,7 @@ schema_capsule, array_capsule = arrow_array.__arrow_c_array__() ```cpp #include -// Receive capsules from Python +// Receive capsules from Python (e.g., from __arrow_c_array__) PyObject* schema_capsule = /* ... */; PyObject* array_capsule = /* ... */; @@ -121,6 +125,27 @@ sparrow::array imported_array = std::cout << "Array size: " << imported_array.size() << std::endl; ``` +## SparrowArray Python Class + +The `SparrowArray` class is a Python type implemented in C++ that: + +- **Wraps a sparrow array** and exposes it to Python +- **Implements `__arrow_c_array__`** (ArrowArrayExportable protocol) +- **Accepts any ArrowArrayExportable** in its constructor (PyArrow, Polars, etc.) +- **Provides a `size()` method** to get the number of elements + +```python +# Constructor accepts any object with __arrow_c_array__ +sparrow_array = SparrowArray(pyarrow_array) +sparrow_array = SparrowArray(another_sparrow_array) + +# Implements ArrowArrayExportable protocol +schema_capsule, array_capsule = sparrow_array.__arrow_c_array__() + +# Get array size +n = sparrow_array.size() +``` + ## Testing ### C++ Unit Tests @@ -130,13 +155,12 @@ cd build ./bin/Debug/test_sparrow_pycapsule_lib ``` -### Polars Integration Tests +### Integration Tests -Test bidirectional data exchange with Polars: +Test bidirectional data exchange with Polars and PyArrow: ```bash - -# Or with direct execution (better output) +# Run integration tests (recommended) cmake --build . --target run_polars_tests_direct # Check dependencies first @@ -153,35 +177,39 @@ The project provides several convenient CMake targets for testing: |--------|-------------| | `run_tests` | Run all C++ unit tests | | `run_tests_with_junit_report` | Run C++ tests with JUnit XML output | -| `run_polars_tests_direct` | Run Polars test directly (recommended, better output) | +| `run_polars_tests_direct` | Run integration tests (recommended) | | `check_polars_deps` | Check Python dependencies (polars, pyarrow) | +| `test_library_load` | Debug library loading issues | **Usage:** ```bash cd build -# Run Polars integration tests +# Run integration tests cmake --build . --target run_polars_tests_direct # Check dependencies first cmake --build . --target check_polars_deps ``` -### Debugging Test Failures +## API Reference -If you encounter segmentation faults or other issues: +### SparrowArray Python Class -```bash -cd build +```cpp +// Create a SparrowArray Python object from a sparrow::array +PyObject* create_sparrow_array_object(sparrow::array&& arr); -# Run minimal library loading test (step-by-step debugging) -cmake --build . --target test_library_load +// Create a SparrowArray from PyCapsules +PyObject* create_sparrow_array_object_from_capsules( + PyObject* schema_capsule, PyObject* array_capsule); -# Check that libraries exist and dependencies are correct -cmake --build . --target check_polars_deps -``` +// Register SparrowArray type with a Python module +int register_sparrow_array_type(PyObject* module); -## API Reference +// Get the SparrowArray type object +PyTypeObject* get_sparrow_array_type(); +``` ### Export Functions @@ -197,9 +225,6 @@ cmake --build . --target check_polars_deps ### Memory Management -- `release_arrow_schema_pycapsule(PyObject* capsule)` - PyCapsule destructor for schema -- `release_arrow_array_pycapsule(PyObject* capsule)` - PyCapsule destructor for array - All capsules have destructors that properly clean up Arrow structures. ## Supported Data Types @@ -216,27 +241,39 @@ All types support nullable values via the Arrow null bitmap. ## Integration with Python Libraries ### Polars + ```python -series = pl.Series([1, 2, 3]) -arrow_array = series.to_arrow() -schema_capsule, array_capsule = arrow_array.__arrow_c_array__() -# Pass to C++ +from polars._plr import PySeries +from polars._utils.wrap import wrap_s + +# SparrowArray implements __arrow_c_array__, use Polars internal API +sparrow_array = SparrowArray(some_arrow_array) +ps = PySeries.from_arrow_c_array(sparrow_array) +series = wrap_s(ps) ``` ### PyArrow + ```python -arrow_array = pa.array([1, 2, 3]) -schema_capsule, array_capsule = arrow_array.__arrow_c_array__() -# Pass to C++ +import pyarrow as pa + +# Create SparrowArray from PyArrow +pa_array = pa.array([1, 2, 3]) +sparrow_array = SparrowArray(pa_array) + +# Export back to PyArrow +schema_capsule, array_capsule = sparrow_array.__arrow_c_array__() ``` ### pandas (via PyArrow) + ```python import pandas as pd +import pyarrow as pa + series = pd.Series([1, 2, 3]) arrow_array = pa.Array.from_pandas(series) -schema_capsule, array_capsule = arrow_array.__arrow_c_array__() -# Pass to C++ +sparrow_array = SparrowArray(arrow_array) ``` ## License From 85e06f685f5afb025c69e0e4b0e8e97509d0d64c Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 27 Nov 2025 09:15:26 +0100 Subject: [PATCH 17/18] remove unwanted files --- test/CMakeLists.txt | 31 -------------- test/check_deps.py | 21 ---------- test/test_library_load.py | 86 --------------------------------------- 3 files changed, 138 deletions(-) delete mode 100644 test/check_deps.py delete mode 100644 test/test_library_load.py diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 67139b3..50435b5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -161,35 +161,4 @@ if(Python_Interpreter_FOUND) ) set_target_properties(run_sparrow_tests_direct PROPERTIES FOLDER "Tests utilities") - - # Custom target to check Sparrow dependencies - add_custom_target(check_sparrow_deps - COMMAND ${CMAKE_COMMAND} -E echo "Checking Sparrow integration test dependencies..." - COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/check_deps.py - COMMAND ${CMAKE_COMMAND} -E echo "Environment variables that will be set:" - COMMAND ${CMAKE_COMMAND} -E echo " TEST_SPARROW_HELPER_LIB_PATH=$" - COMMAND ${CMAKE_COMMAND} -E echo " SPARROW_PYCAPSULE_LIB_PATH=$" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Checking Sparrow test dependencies" - USES_TERMINAL - ) - - set_target_properties(check_sparrow_deps PROPERTIES FOLDER "Tests utilities") - - # Minimal library loading test for debugging segfaults - add_custom_target(test_library_load - COMMAND ${CMAKE_COMMAND} -E env - "TEST_SPARROW_HELPER_LIB_PATH=$" - "SPARROW_PYCAPSULE_LIB_PATH=$" - "PYTHONUNBUFFERED=1" - ${Python_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/test_library_load.py - DEPENDS - test_sparrow_helper - sparrow-pycapsule - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Testing library loading step-by-step" - USES_TERMINAL - ) - - set_target_properties(test_library_load PROPERTIES FOLDER "Tests utilities") endif() diff --git a/test/check_deps.py b/test/check_deps.py deleted file mode 100644 index e082ecc..0000000 --- a/test/check_deps.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -"""Check if required Python dependencies are installed.""" - -import sys - -try: - import polars - print(f"polars version: {polars.__version__}") -except ImportError: - print("ERROR: polars not installed") - sys.exit(1) - -try: - import pyarrow as pa - print(f"pyarrow version: {pa.__version__}") -except ImportError: - print("ERROR: pyarrow not installed") - sys.exit(1) - -print("\nAll dependencies installed!") -sys.exit(0) diff --git a/test/test_library_load.py b/test/test_library_load.py deleted file mode 100644 index 0afc7c9..0000000 --- a/test/test_library_load.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 -""" -Minimal test to debug library loading issues. -This script tests each step individually to identify where segfaults occur. -""" - -import sys -import os -import ctypes -from pathlib import Path - -def step(num, description): - """Print a test step.""" - print(f"\n{'='*60}") - print(f"Step {num}: {description}") - print('='*60) - -def main(): - print("\n" + "="*60) - print("Library Loading Debug Test") - print("="*60) - - try: - step(1, "Checking environment variables") - helper_path = os.environ.get('TEST_SPARROW_HELPER_LIB_PATH') - main_path = os.environ.get('SPARROW_PYCAPSULE_LIB_PATH') - - print(f"TEST_SPARROW_HELPER_LIB_PATH: {helper_path}") - print(f"SPARROW_PYCAPSULE_LIB_PATH: {main_path}") - - if not helper_path: - print("ERROR: TEST_SPARROW_HELPER_LIB_PATH not set") - return 1 - - if not main_path: - print("ERROR: SPARROW_PYCAPSULE_LIB_PATH not set") - return 1 - - step(2, "Checking library files exist") - helper_file = Path(helper_path) - main_file = Path(main_path) - - print(f"Helper library exists: {helper_file.exists()} ({helper_file})") - print(f"Main library exists: {main_file.exists()} ({main_file})") - - if not helper_file.exists(): - print(f"ERROR: Helper library not found at {helper_file}") - return 1 - - if not main_file.exists(): - print(f"ERROR: Main library not found at {main_file}") - return 1 - - step(3, "Testing ctypes module") - print("ctypes imported successfully") - print(f"ctypes.CDLL: {ctypes.CDLL}") - - step(4, "Loading sparrow-pycapsule library") - try: - main_lib = ctypes.CDLL(str(main_file)) - print("✓ Main library loaded successfully") - except Exception as e: - print(f"✗ Failed to load main library: {e}") - return 1 - - step(5, "Loading test_sparrow_helper library") - try: - helper_lib = ctypes.CDLL(str(helper_file)) - print("✓ Helper library loaded successfully") - except Exception as e: - print(f"✗ Failed to load helper library: {e}") - return 1 - - print("\n" + "="*60) - print("✓ ALL STEPS COMPLETED SUCCESSFULLY") - print("="*60) - return 0 - - except Exception as e: - print(f"\n✗ EXCEPTION: {e}") - import traceback - traceback.print_exc() - return 1 - -if __name__ == "__main__": - sys.exit(main()) From 51a644d8087901a7065e63e221bf396e8bd52d34 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Thu, 27 Nov 2025 09:24:01 +0100 Subject: [PATCH 18/18] deactivate osc python tests --- .github/workflows/osx.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index e5951ac..41dbd32 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -55,10 +55,10 @@ jobs: working-directory: build run: cmake --build . --target run_tests_with_junit_report - - name: Run Sparrow integration tests - if: matrix.build_shared == 'ON' - working-directory: build - run: cmake --build . --target run_sparrow_tests_direct + # - name: Run Sparrow integration tests + # if: matrix.build_shared == 'ON' + # working-directory: build + # run: cmake --build . --target run_sparrow_tests_direct - name: Install working-directory: build