Skip to content

Commit 0a0d345

Browse files
committed
wip
1 parent 58d532d commit 0a0d345

File tree

4 files changed

+549
-108
lines changed

4 files changed

+549
-108
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#pragma once
2+
3+
#define PY_SSIZE_T_CLEAN
4+
#include <Python.h>
5+
6+
#include <sparrow/array.hpp>
7+
8+
#include "sparrow-pycapsule/config/config.hpp"
9+
#include "sparrow-pycapsule/pycapsule.hpp"
10+
11+
namespace sparrow::pycapsule
12+
{
13+
/**
14+
* @brief Python object structure for SparrowArray.
15+
*
16+
* This structure holds a pointer to a sparrow::array. The pointer is used
17+
* to avoid issues with C++ objects in C-style Python object structures.
18+
*/
19+
struct SparrowArrayObject
20+
{
21+
PyObject_HEAD sparrow::array* arr;
22+
};
23+
24+
/**
25+
* @brief Deallocator for SparrowArray Python objects.
26+
*/
27+
SPARROW_PYCAPSULE_API void SparrowArray_dealloc(SparrowArrayObject* self);
28+
29+
/**
30+
* @brief Implementation of __arrow_c_array__ method.
31+
*
32+
* This method exports the wrapped sparrow array as Arrow PyCapsules,
33+
* implementing the Arrow PyCapsule Interface (ArrowArrayExportable protocol).
34+
*
35+
* @param self The SparrowArray object.
36+
* @param args Positional arguments (unused).
37+
* @param kwargs Keyword arguments (optional requested_schema).
38+
* @return A tuple of (schema_capsule, array_capsule).
39+
*/
40+
SPARROW_PYCAPSULE_API PyObject*
41+
SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs);
42+
43+
/**
44+
* @brief Get the size of the wrapped array.
45+
*
46+
* @param self The SparrowArray object.
47+
* @param args Positional arguments (unused).
48+
* @return The size of the array as a Python integer.
49+
*/
50+
SPARROW_PYCAPSULE_API PyObject* SparrowArray_size(SparrowArrayObject* self, PyObject* args);
51+
52+
/**
53+
* @brief Get the Python type object for SparrowArray.
54+
*
55+
* This function returns a pointer to the SparrowArrayType. The type is
56+
* initialized on first call if necessary.
57+
*
58+
* @return Pointer to the SparrowArrayType, or nullptr on error.
59+
*/
60+
SPARROW_PYCAPSULE_API PyTypeObject* get_sparrow_array_type();
61+
62+
/**
63+
* @brief Create a new SparrowArray Python object from a sparrow::array.
64+
*
65+
* This function creates a new Python object that wraps the given sparrow array.
66+
* The array is moved into the Python object, so the caller should not use it
67+
* after this call.
68+
*
69+
* @param arr The sparrow array to wrap (will be moved).
70+
* @return A new reference to a SparrowArray Python object, or nullptr on error.
71+
*/
72+
SPARROW_PYCAPSULE_API PyObject* create_sparrow_array_object(sparrow::array&& arr);
73+
74+
/**
75+
* @brief Create a new SparrowArray Python object from PyCapsules.
76+
*
77+
* This function creates a new Python object by importing from existing
78+
* Arrow PyCapsules.
79+
*
80+
* @param schema_capsule The schema PyCapsule.
81+
* @param array_capsule The array PyCapsule.
82+
* @return A new reference to a SparrowArray Python object, or nullptr on error.
83+
*/
84+
SPARROW_PYCAPSULE_API PyObject*
85+
create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule);
86+
87+
/**
88+
* @brief Register the SparrowArray type with a Python module.
89+
*
90+
* This function adds the SparrowArray type to the given module.
91+
*
92+
* @param module The Python module to add the type to.
93+
* @return 0 on success, -1 on error.
94+
*/
95+
SPARROW_PYCAPSULE_API int register_sparrow_array_type(PyObject* module);
96+
97+
} // namespace sparrow::pycapsule

src/SparrowPythonClass.cpp

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#include "sparrow-pycapsule/SparrowPythonClass.hpp"
2+
3+
#include <new>
4+
#include <utility>
5+
6+
namespace sparrow::pycapsule
7+
{
8+
void SparrowArray_dealloc(SparrowArrayObject* self)
9+
{
10+
delete self->arr;
11+
self->arr = nullptr;
12+
Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
13+
}
14+
15+
PyObject* SparrowArray_arrow_c_array(SparrowArrayObject* self, PyObject* args, PyObject* kwargs)
16+
{
17+
static const char* kwlist[] = {"requested_schema", nullptr};
18+
PyObject* requested_schema = nullptr;
19+
20+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", const_cast<char**>(kwlist), &requested_schema))
21+
{
22+
return nullptr;
23+
}
24+
25+
// requested_schema is typically ignored for simple cases
26+
// In a full implementation, you might use it to cast to a different type
27+
(void) requested_schema;
28+
29+
if (self->arr == nullptr)
30+
{
31+
PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data");
32+
return nullptr;
33+
}
34+
35+
try
36+
{
37+
auto [schema_capsule, array_capsule] = export_array_to_capsules(*self->arr);
38+
39+
if (schema_capsule == nullptr || array_capsule == nullptr)
40+
{
41+
Py_XDECREF(schema_capsule);
42+
Py_XDECREF(array_capsule);
43+
PyErr_SetString(PyExc_RuntimeError, "Failed to create Arrow PyCapsules");
44+
return nullptr;
45+
}
46+
47+
PyObject* result = PyTuple_Pack(2, schema_capsule, array_capsule);
48+
Py_DECREF(schema_capsule);
49+
Py_DECREF(array_capsule);
50+
return result;
51+
}
52+
catch (const std::exception& e)
53+
{
54+
PyErr_SetString(PyExc_RuntimeError, e.what());
55+
return nullptr;
56+
}
57+
}
58+
59+
PyObject* SparrowArray_size(SparrowArrayObject* self, [[maybe_unused]] PyObject* args)
60+
{
61+
if (self->arr == nullptr)
62+
{
63+
PyErr_SetString(PyExc_ValueError, "SparrowArray contains no data");
64+
return nullptr;
65+
}
66+
67+
return PyLong_FromSize_t(self->arr->size());
68+
}
69+
70+
static PyMethodDef SparrowArray_methods[] = {
71+
{"__arrow_c_array__",
72+
reinterpret_cast<PyCFunction>(SparrowArray_arrow_c_array),
73+
METH_VARARGS | METH_KEYWORDS,
74+
"Export the array via the Arrow PyCapsule interface.\n\n"
75+
"Parameters\n"
76+
"----------\n"
77+
"requested_schema : object, optional\n"
78+
" Requested schema for the output (typically ignored).\n\n"
79+
"Returns\n"
80+
"-------\n"
81+
"tuple[object, object]\n"
82+
" A tuple of (schema_capsule, array_capsule)."},
83+
{"size",
84+
reinterpret_cast<PyCFunction>(SparrowArray_size),
85+
METH_NOARGS,
86+
"Get the number of elements in the array.\n\n"
87+
"Returns\n"
88+
"-------\n"
89+
"int\n"
90+
" The size of the array."},
91+
{nullptr, nullptr, 0, nullptr} // Sentinel
92+
};
93+
94+
// The type object - defined as a static variable
95+
static PyTypeObject SparrowArrayType = {
96+
.ob_base = PyVarObject_HEAD_INIT(nullptr, 0).tp_name = "sparrow.SparrowArray",
97+
.tp_basicsize = sizeof(SparrowArrayObject),
98+
.tp_itemsize = 0,
99+
.tp_dealloc = reinterpret_cast<destructor>(SparrowArray_dealloc),
100+
.tp_flags = Py_TPFLAGS_DEFAULT,
101+
.tp_doc = PyDoc_STR(
102+
"SparrowArray - Arrow array wrapper implementing __arrow_c_array__.\n\n"
103+
"This class wraps a sparrow array and implements the Arrow PyCapsule\n"
104+
"Interface (ArrowArrayExportable protocol), allowing it to be passed\n"
105+
"directly to libraries like Polars via pl.from_arrow()."
106+
),
107+
.tp_methods = SparrowArray_methods,
108+
};
109+
110+
static bool type_initialized = false;
111+
112+
PyTypeObject* get_sparrow_array_type()
113+
{
114+
if (!type_initialized)
115+
{
116+
if (PyType_Ready(&SparrowArrayType) < 0)
117+
{
118+
return nullptr;
119+
}
120+
type_initialized = true;
121+
}
122+
return &SparrowArrayType;
123+
}
124+
125+
PyObject* create_sparrow_array_object(sparrow::array&& arr)
126+
{
127+
PyTypeObject* type = get_sparrow_array_type();
128+
if (type == nullptr)
129+
{
130+
return nullptr;
131+
}
132+
133+
SparrowArrayObject* obj = PyObject_New(SparrowArrayObject, type);
134+
if (obj == nullptr)
135+
{
136+
return nullptr;
137+
}
138+
139+
try
140+
{
141+
obj->arr = new sparrow::array(std::move(arr));
142+
}
143+
catch (const std::bad_alloc&)
144+
{
145+
Py_DECREF(obj);
146+
PyErr_NoMemory();
147+
return nullptr;
148+
}
149+
catch (const std::exception& e)
150+
{
151+
Py_DECREF(obj);
152+
PyErr_SetString(PyExc_RuntimeError, e.what());
153+
return nullptr;
154+
}
155+
156+
return reinterpret_cast<PyObject*>(obj);
157+
}
158+
159+
PyObject* create_sparrow_array_object_from_capsules(PyObject* schema_capsule, PyObject* array_capsule)
160+
{
161+
try
162+
{
163+
sparrow::array arr = import_array_from_capsules(schema_capsule, array_capsule);
164+
return create_sparrow_array_object(std::move(arr));
165+
}
166+
catch (const std::exception& e)
167+
{
168+
PyErr_SetString(PyExc_RuntimeError, e.what());
169+
return nullptr;
170+
}
171+
}
172+
173+
int register_sparrow_array_type(PyObject* module)
174+
{
175+
PyTypeObject* type = get_sparrow_array_type();
176+
if (type == nullptr)
177+
{
178+
return -1;
179+
}
180+
181+
Py_INCREF(type);
182+
if (PyModule_AddObject(module, "SparrowArray", reinterpret_cast<PyObject*>(type)) < 0)
183+
{
184+
Py_DECREF(type);
185+
return -1;
186+
}
187+
188+
return 0;
189+
}
190+
191+
} // namespace sparrow::pycapsule

0 commit comments

Comments
 (0)