Skip to content

Commit a451a95

Browse files
authored
feat(python): Add copy_into() to CBufferView (#455)
This is the non-bitmap equivalent of #450, useful for the same purpose (concatenating one big data buffer from chunks).
1 parent 2f2450a commit a451a95

File tree

2 files changed

+114
-19
lines changed

2 files changed

+114
-19
lines changed

python/src/nanoarrow/_lib.pyx

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,32 +1948,43 @@ cdef class CBufferView:
19481948
else:
19491949
return self._iter_dispatch(offset, length)
19501950

1951+
def copy_into(self, dest, offset=0, length=None, dest_offset=0):
1952+
if length is None:
1953+
length = self.n_elements
1954+
1955+
cdef Py_buffer buffer
1956+
PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS)
1957+
1958+
cdef int64_t c_offset = offset
1959+
cdef int64_t c_length = length
1960+
cdef int64_t c_item_size = self.item_size
1961+
cdef int64_t c_dest_offset = dest_offset
1962+
self._check_copy_into_bounds(&buffer, c_offset, c_length, dest_offset, c_item_size)
1963+
1964+
cdef uint8_t* dest_uint8 = <uint8_t*>buffer.buf
1965+
cdef int64_t dest_offset_bytes = c_dest_offset * c_item_size
1966+
cdef int64_t src_offset_bytes = c_offset * c_item_size
1967+
cdef int64_t bytes_to_copy = c_length * c_item_size
1968+
1969+
memcpy(
1970+
&(dest_uint8[dest_offset_bytes]),
1971+
&(self._ptr.data.as_uint8[src_offset_bytes]),
1972+
bytes_to_copy
1973+
)
1974+
1975+
PyBuffer_Release(&buffer)
1976+
return bytes_to_copy
1977+
19511978
def unpack_bits_into(self, dest, offset=0, length=None, dest_offset=0):
19521979
if self._data_type != NANOARROW_TYPE_BOOL:
19531980
raise ValueError("Can't unpack non-boolean buffer")
19541981

19551982
if length is None:
19561983
length = self.n_elements
19571984

1958-
if offset < 0 or length < 0 or (offset + length) > self.n_elements:
1959-
raise IndexError(
1960-
f"offset {offset} and length {length} do not describe a valid slice "
1961-
f"of buffer with {self.n_elements} elements"
1962-
)
1963-
19641985
cdef Py_buffer buffer
19651986
PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS)
1966-
if buffer.itemsize != 1:
1967-
PyBuffer_Release(&buffer)
1968-
raise ValueError("Destination buffer has itemsize != 1")
1969-
1970-
if dest_offset < 0 or buffer.len < (dest_offset + length):
1971-
buffer_len = buffer.len
1972-
PyBuffer_Release(&buffer)
1973-
raise IndexError(
1974-
f"Can't unpack {length} elements into buffer of size {buffer_len} "
1975-
f"with dest_offset = {dest_offset}"
1976-
)
1987+
self._check_copy_into_bounds(&buffer, offset, length, dest_offset, 1)
19771988

19781989
ArrowBitsUnpackInt8(
19791990
self._ptr.data.as_uint8,
@@ -1983,6 +1994,7 @@ cdef class CBufferView:
19831994
)
19841995

19851996
PyBuffer_Release(&buffer)
1997+
return length
19861998

19871999
def unpack_bits(self, offset=0, length=None):
19882000
if length is None:
@@ -1994,6 +2006,42 @@ cdef class CBufferView:
19942006
out.advance(length)
19952007
return out.finish()
19962008

2009+
def copy(self, offset=0, length=None):
2010+
if length is None:
2011+
length = self.n_elements
2012+
2013+
cdef int64_t bytes_to_copy = length * self.item_size
2014+
out = CBufferBuilder().set_data_type(self.data_type_id)
2015+
out.reserve_bytes(bytes_to_copy)
2016+
self.copy_into(out, offset, length)
2017+
out.advance(bytes_to_copy)
2018+
return out.finish()
2019+
2020+
cdef _check_copy_into_bounds(self, Py_buffer* dest, int64_t offset, int64_t length,
2021+
int64_t dest_offset, int64_t dest_itemsize):
2022+
if offset < 0 or length < 0 or (offset + length) > self.n_elements:
2023+
PyBuffer_Release(dest)
2024+
raise IndexError(
2025+
f"offset {offset} and length {length} do not describe a valid slice "
2026+
f"of buffer with {self.n_elements} elements"
2027+
)
2028+
2029+
if dest.itemsize != 1 and dest.itemsize != dest_itemsize:
2030+
raise ValueError(
2031+
"Destination buffer must have itemsize == 1 or "
2032+
f"itemsize == {dest_itemsize}"
2033+
)
2034+
2035+
cdef int64_t dest_offset_bytes = dest_offset * dest_itemsize
2036+
cdef int64_t bytes_to_copy = dest_itemsize * length
2037+
if dest_offset < 0 or dest.len < (dest_offset_bytes + bytes_to_copy):
2038+
buffer_len = dest.len
2039+
PyBuffer_Release(dest)
2040+
raise IndexError(
2041+
f"Can't unpack {length} elements into buffer of size {buffer_len} "
2042+
f"with dest_offset = {dest_offset}"
2043+
)
2044+
19972045
def _iter_bitmap(self, int64_t offset, int64_t length):
19982046
cdef uint8_t item
19992047
cdef int64_t i

python/tests/test_c_buffer_view.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_buffer_view_bool_unpack():
8181

8282
# Check with non-zero destination offset
8383
out = bytearray([255] * 10)
84-
view.unpack_bits_into(out, dest_offset=2)
84+
assert view.unpack_bits_into(out, dest_offset=2) == 8
8585
assert list(out) == [255, 255, 1, 0, 0, 1, 0, 0, 0, 0]
8686

8787
# Check error requesting out-of-bounds dest_offset
@@ -105,7 +105,7 @@ def test_buffer_view_bool_unpack():
105105

106106
# Check errors from an output buffer with the wrong data type
107107
out = array("i", [0, 0, 0, 0])
108-
msg = "Destination buffer has itemsize != 1"
108+
msg = "Destination buffer must have itemsize == 1"
109109
with pytest.raises(ValueError, match=msg):
110110
view.unpack_bits_into(out)
111111

@@ -153,3 +153,50 @@ def test_buffer_view_non_bool():
153153

154154
# Check repr
155155
assert "1 2 3 5" in repr(view)
156+
157+
158+
def test_buffer_view_copy():
159+
from array import array
160+
161+
array_view = na.c_array([1, 2, 3, 4], na.int32()).view()
162+
view = array_view.buffer(1)
163+
164+
# Check copying
165+
copied_all = view.copy()
166+
assert len(copied_all) == view.n_elements
167+
assert copied_all.data_type == "int32"
168+
assert list(copied_all) == [1, 2, 3, 4]
169+
170+
copied_some = view.copy(1, 3)
171+
assert len(copied_some) == 3
172+
assert list(copied_some) == [2, 3, 4]
173+
174+
# Check with non-zero destination offset
175+
out = array(view.format, [0, 0, 0, 0, 0, 0])
176+
assert view.copy_into(out, dest_offset=2) == 16
177+
assert list(out) == [0, 0, 1, 2, 3, 4]
178+
179+
# Check error requesting out-of-bounds dest_offset
180+
with pytest.raises(IndexError, match="Can't unpack"):
181+
view.copy_into(out, dest_offset=-1)
182+
183+
# Check errors from requesting out-of-bounds slices
184+
msg = "do not describe a valid slice"
185+
with pytest.raises(IndexError, match=msg):
186+
view.copy(-1, None)
187+
with pytest.raises(IndexError, match=msg):
188+
view.copy(0, -1)
189+
with pytest.raises(IndexError, match=msg):
190+
view.copy(0, 9)
191+
192+
# Check errors from an output buffer of insufficient length
193+
out = array("i")
194+
msg = "Can't unpack 4 elements into buffer of size 0"
195+
with pytest.raises(IndexError, match=msg):
196+
view.copy_into(out)
197+
198+
# Check errors from an output buffer with the wrong data type
199+
out = array("d", [0, 0, 0, 0])
200+
msg = "Destination buffer must have itemsize == 1 or itemsize == 4"
201+
with pytest.raises(ValueError, match=msg):
202+
view.copy_into(out)

0 commit comments

Comments
 (0)