Skip to content

Commit 9237ba1

Browse files
authored
Revert Data fields to bytes and add get_data_as_view for zero-copy access (#390)
* get data field with view * refine tc * refine based on flake check * run black again * rebase upstream master * add comment to tc * refine raise exception
1 parent 9754258 commit 9237ba1

File tree

4 files changed

+265
-7
lines changed

4 files changed

+265
-7
lines changed

capnp/lib/capnp.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ cdef class _DynamicStructReader:
6565
cpdef _which_str(self)
6666
cpdef _get_by_field(self, _StructSchemaField field)
6767
cpdef _has_by_field(self, _StructSchemaField field)
68+
cpdef get_data_as_view(self, field)
6869

6970
cpdef as_builder(self, num_first_segment_words=?, allocate_seg_callable=?)
7071

@@ -97,6 +98,7 @@ cdef class _DynamicStructBuilder:
9798
cpdef _which_str(self)
9899
cpdef adopt(self, field, _DynamicOrphan orphan)
99100
cpdef disown(self, field)
101+
cpdef get_data_as_view(self, field)
100102

101103
cpdef as_reader(self)
102104
cpdef copy(self, num_first_segment_words=?, allocate_seg_callable=?)

capnp/lib/capnp.pyx

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ from capnp.includes.schema_cpp cimport (MessageReader,)
1515

1616
from builtins import memoryview as BuiltinsMemoryview
1717
from cpython cimport array, Py_buffer, PyObject_CheckBuffer
18-
from cpython.buffer cimport PyBUF_SIMPLE, PyBUF_WRITABLE, PyBUF_WRITE, PyBUF_READ, PyBUF_CONTIG_RO
19-
from cpython.memoryview cimport PyMemoryView_FromMemory
18+
from cpython.buffer cimport PyBUF_SIMPLE, PyBUF_WRITABLE, PyBUF_WRITE, PyBUF_READ, PyBUF_CONTIG_RO, PyBuffer_FillInfo
19+
from cpython.memoryview cimport PyMemoryView_FromMemory, PyMemoryView_FromBuffer
2020
from cpython.bytes cimport PyBytes_FromStringAndSize
2121
from cpython.exc cimport PyErr_Clear
2222
from cython.operator cimport dereference as deref
@@ -669,7 +669,7 @@ cdef to_python_reader(C_DynamicValue.Reader self, object parent):
669669
return (<char*>temp_text.begin())[:temp_text.size()]
670670
elif type == capnp.TYPE_DATA:
671671
temp_data = self.asData()
672-
return PyMemoryView_FromMemory(<char *> temp_data.begin(), temp_data.size(), PyBUF_READ)
672+
return <bytes>((<char*>temp_data.begin())[:temp_data.size()])
673673
elif type == capnp.TYPE_LIST:
674674
return _DynamicListReader()._init(self.asList(), parent)
675675
elif type == capnp.TYPE_STRUCT:
@@ -703,7 +703,7 @@ cdef to_python_builder(C_DynamicValue.Builder self, object parent):
703703
return (<char*>temp_text.begin())[:temp_text.size()]
704704
elif type == capnp.TYPE_DATA:
705705
temp_data = self.asData()
706-
return PyMemoryView_FromMemory(<char *> temp_data.begin(), temp_data.size(), PyBUF_WRITE)
706+
return <bytes>((<char*>temp_data.begin())[:temp_data.size()])
707707
elif type == capnp.TYPE_LIST:
708708
return _DynamicListBuilder()._init(self.asList(), parent)
709709
elif type == capnp.TYPE_STRUCT:
@@ -1226,6 +1226,29 @@ cdef class _DynamicStructReader:
12261226
cpdef _has_by_field(self, _StructSchemaField field):
12271227
return self.thisptr.hasByField(field.thisptr)
12281228

1229+
cpdef get_data_as_view(self, field):
1230+
"""
1231+
Efficiently get a read-only memoryview for a DATA field without copying.
1232+
"""
1233+
cdef C_DynamicValue.Reader val
1234+
cdef capnp.Data.Reader temp_data
1235+
1236+
try:
1237+
val = self.thisptr.get(field)
1238+
except KjException as e:
1239+
raise e._to_python() from None
1240+
1241+
if val.getType() != capnp.TYPE_DATA:
1242+
raise TypeError("Field '{}' is not a DATA field".format(field))
1243+
1244+
temp_data = val.asData()
1245+
1246+
# Return read-only memoryview
1247+
cdef Py_buffer buf
1248+
if PyBuffer_FillInfo(&buf, self, <void*>temp_data.begin(), temp_data.size(), 1, PyBUF_CONTIG_RO) < 0:
1249+
raise KjException("Failed to create buffer info")
1250+
return PyMemoryView_FromBuffer(&buf)
1251+
12291252
cpdef _which_str(self):
12301253
try:
12311254
return <char *>helpers.fixMaybe(self.thisptr.which()).getProto().getName().cStr()
@@ -1628,6 +1651,32 @@ cdef class _DynamicStructBuilder:
16281651
"""
16291652
return _DynamicOrphan()._init(self.thisptr.disown(field), self._parent)
16301653

1654+
cpdef get_data_as_view(self, field):
1655+
"""
1656+
Efficiently get a writable memoryview for a DATA field without copying.
1657+
1658+
This allows in-place modification of the underlying buffer:
1659+
msg.get_data_as_view('myField')[0] = 0xFF
1660+
"""
1661+
cdef C_DynamicValue.Builder val
1662+
cdef capnp.Data.Builder temp_data
1663+
1664+
try:
1665+
val = self.thisptr.get(field)
1666+
except KjException as e:
1667+
raise e._to_python() from None
1668+
1669+
if val.getType() != capnp.TYPE_DATA:
1670+
raise TypeError("Field '{}' is not a DATA field".format(field))
1671+
1672+
temp_data = val.asData()
1673+
1674+
# Return writable memoryview
1675+
cdef Py_buffer buf
1676+
if PyBuffer_FillInfo(&buf, self, <void*>temp_data.begin(), temp_data.size(), 0, PyBUF_WRITABLE) < 0:
1677+
raise KjException("Failed to create buffer info")
1678+
return PyMemoryView_FromBuffer(&buf)
1679+
16311680
cpdef as_reader(self):
16321681
"""A method for casting this Builder to a Reader
16331682

test/test_get_data_view.py

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
import os
2+
import pytest
3+
import capnp
4+
import sys
5+
import gc
6+
7+
8+
@pytest.fixture(scope="module")
9+
def all_types():
10+
"""Load the standard all_types.capnp schema."""
11+
directory = os.path.dirname(__file__)
12+
return capnp.load(os.path.join(directory, "all_types.capnp"))
13+
14+
15+
def test_set_bytes_get_bytes(all_types):
16+
"""
17+
Scenario 1: Set Byte -> Get Byte
18+
Verify standard behavior: writing bytes results in reading bytes.
19+
"""
20+
msg = all_types.TestAllTypes.new_message()
21+
input_data = b"hello_world"
22+
23+
# Set
24+
msg.dataField = input_data
25+
26+
# Get
27+
output_data = msg.dataField
28+
29+
# Verify
30+
assert isinstance(output_data, bytes)
31+
assert output_data == input_data
32+
33+
34+
def test_set_view_get_bytes(all_types):
35+
"""
36+
Scenario 2: Set View -> Get Byte
37+
Verify compatibility: Passing a memoryview sets the data,
38+
but standard attribute access returns a bytes copy.
39+
"""
40+
msg = all_types.TestAllTypes.new_message()
41+
42+
# Create a memoryview source
43+
raw_source = bytearray(b"view_source")
44+
view = memoryview(raw_source)
45+
46+
# Set via memoryview
47+
msg.dataField = view
48+
49+
# Get via standard attribute
50+
output_data = msg.dataField
51+
52+
# Verify
53+
assert isinstance(output_data, bytes)
54+
assert output_data == b"view_source"
55+
56+
57+
def test_set_bytes_get_view_and_modify(all_types):
58+
"""
59+
Scenario 3: Set Byte -> Get View
60+
Verify the high-performance API get_data_as_view.
61+
The view must be writable and modifications must reflect in the message.
62+
"""
63+
msg = all_types.TestAllTypes.new_message()
64+
65+
# Initial write
66+
msg.dataField = b"ABCDE"
67+
68+
# Get view via new API
69+
view = msg.get_data_as_view("dataField")
70+
71+
# Verify view properties
72+
assert isinstance(view, memoryview)
73+
assert view.readonly is False
74+
assert view.tobytes() == b"ABCDE"
75+
76+
# Verify in-place modification
77+
view[0] = ord("Z") # Change 'A' to 'Z'
78+
79+
# Verify modification is reflected in standard access
80+
assert msg.dataField == b"ZBCDE"
81+
82+
83+
def test_reader_vs_builder_view(all_types):
84+
"""
85+
Verify that Builder views are writable, but Reader views are read-only.
86+
"""
87+
# 1. Builder phase
88+
builder = all_types.TestAllTypes.new_message()
89+
builder.dataField = b"test_rw"
90+
91+
builder_view = builder.get_data_as_view("dataField")
92+
assert builder_view.readonly is False
93+
builder_view[0] = ord("T") # Modification allowed
94+
95+
# 2. Reader phase
96+
reader = builder.as_reader()
97+
98+
# Standard Get
99+
assert reader.dataField == b"Test_rw"
100+
101+
# Reader get_data_as_view
102+
reader_view = reader.get_data_as_view("dataField")
103+
assert isinstance(reader_view, memoryview)
104+
assert reader_view.readonly is True
105+
106+
# Attempting to modify Reader view should raise TypeError
107+
with pytest.raises(TypeError):
108+
reader_view[0] = ord("X")
109+
110+
111+
def test_nested_struct_data(all_types):
112+
"""
113+
Verify that get_data_as_view works correctly on nested structs.
114+
"""
115+
msg = all_types.TestAllTypes.new_message()
116+
117+
# Initialize nested struct
118+
inner = msg.init("structField")
119+
inner.int32Field = 100
120+
inner.dataField = b"nested_data"
121+
122+
# 1. Verify standard access
123+
assert msg.structField.dataField == b"nested_data"
124+
125+
# 2. Verify nested get_data_as_view
126+
view = msg.structField.get_data_as_view("dataField")
127+
128+
assert isinstance(view, memoryview)
129+
assert view.tobytes() == b"nested_data"
130+
131+
# Modify nested data
132+
view[0] = ord("N")
133+
assert msg.structField.dataField == b"Nested_data"
134+
135+
136+
def test_corner_cases_values(all_types):
137+
"""
138+
Test edge cases: Empty bytes and binary data with nulls.
139+
"""
140+
msg = all_types.TestAllTypes.new_message()
141+
142+
# Case A: Empty Bytes
143+
msg.dataField = b""
144+
assert msg.dataField == b""
145+
view = msg.get_data_as_view("dataField")
146+
assert len(view) == 0
147+
148+
# Case B: Binary data containing null bytes
149+
binary_data = b"\x00\xff\x00\x01"
150+
msg.dataField = binary_data
151+
assert msg.dataField == binary_data
152+
assert msg.get_data_as_view("dataField").tobytes() == binary_data
153+
154+
155+
def test_error_wrong_type(all_types):
156+
"""
157+
Test error handling: Calling get_data_as_view on non-Data fields.
158+
"""
159+
msg = all_types.TestAllTypes.new_message()
160+
msg.int32Field = 123
161+
msg.textField = "I am text"
162+
163+
# Attempt on Int field
164+
with pytest.raises(TypeError) as excinfo:
165+
msg.get_data_as_view("int32Field")
166+
assert "not a DATA field" in str(excinfo.value)
167+
168+
# Attempt on Text field
169+
with pytest.raises(TypeError) as excinfo:
170+
msg.get_data_as_view("textField")
171+
assert "not a DATA field" in str(excinfo.value)
172+
173+
174+
def test_error_missing_field(all_types):
175+
"""
176+
Test error handling: Accessing a non-existent field name.
177+
"""
178+
msg = all_types.TestAllTypes.new_message()
179+
180+
# Accessing a missing field should raise AttributeError (standard Python behavior)
181+
with pytest.raises(AttributeError) as excinfo:
182+
msg.get_data_as_view("non_existent_field")
183+
184+
# Optional: Verify the error message contains the field name
185+
assert "non_existent_field" in str(excinfo.value)
186+
187+
188+
def test_view_keeps_message_alive(all_types):
189+
"""
190+
Verify that a View keeps messages alive.
191+
"""
192+
msg = all_types.TestAllTypes.new_message()
193+
expected_data = b"persistence_check"
194+
msg.dataField = expected_data
195+
196+
initial_ref_count = sys.getrefcount(msg)
197+
view = msg.get_data_as_view("dataField")
198+
new_ref_count = sys.getrefcount(msg)
199+
200+
assert (
201+
new_ref_count > initial_ref_count
202+
), f"View failed to hold reference to Message! (Old: {initial_ref_count}, New: {new_ref_count})"
203+
print(
204+
f"\n[Ref Check] Success: Ref count increased from {initial_ref_count} to {new_ref_count}"
205+
)
206+
207+
del msg
208+
gc.collect()
209+
210+
assert view.tobytes() == expected_data

test/test_py_custom_message_builder.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,6 @@ def __call__(self, minimum_size: int) -> bytearray:
4141
struct_builder.init("dataField", 5)
4242
assert struct_builder._get("dataField") == b"\x00\x00\x00\x00\x00"
4343

44-
struct_builder._get("dataField")[1] = 0xFF
45-
assert struct_builder._get("dataField") == b"\x00\xff\x00\x00\x00"
46-
4744
struct_builder.dataField = b"hello"
4845
assert struct_builder._get("dataField") == b"hello"
4946

0 commit comments

Comments
 (0)