Skip to content

Commit 8b5ebfb

Browse files
committed
Add conversion provider delegation for container element types
This change allows conversion providers (like StdStringUnicodeConverter) to work inside containers (like std::vector) by adding a delegation mechanism. Changes: - Add supports_delegation() method to TypeConverterBase (default: False) - Enable delegation for StdStringUnicodeConverter and StdStringUnicodeOutputConverter - Modify StdVectorConverter to delegate to element converters when supports_delegation() returns True - Add test for UTF-8 string vectors This fixes the limitation where UTF-8 string converters would not be picked up when used inside containers. Now, vectors of libcpp_utf8_string or libcpp_utf8_output_string properly encode/decode UTF-8 strings.
1 parent 6236c3d commit 8b5ebfb

File tree

4 files changed

+213
-7
lines changed

4 files changed

+213
-7
lines changed

autowrap/ConversionProvider.py

Lines changed: 132 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,17 @@ def output_conversion(
134134
) -> Optional[Union[Code, str]]:
135135
raise NotImplementedError()
136136

137+
def supports_delegation(self) -> bool:
138+
"""
139+
Return True if this converter should be invoked when the type
140+
is an element inside a container (e.g., std::vector).
141+
142+
By default, Cython handles conversion of standard types inside
143+
containers automatically. When this returns True, container
144+
converters will delegate to this converter instead.
145+
"""
146+
return False
147+
137148
@staticmethod
138149
def _code_for_instantiate_object_from_iter(cpp_type: CppType, it: str) -> str:
139150
"""
@@ -1668,6 +1679,16 @@ def _perform_recursion(
16681679
else:
16691680
bottommost_code.content.extend(bottommost_code_callback.content)
16701681

1682+
def _has_delegating_converter(self, element_type: CppType) -> bool:
1683+
"""Check if element type has a converter that supports delegation."""
1684+
if not hasattr(self, "cr"):
1685+
return False
1686+
try:
1687+
converter = self.cr.get(element_type)
1688+
return converter.supports_delegation()
1689+
except (NameError, KeyError):
1690+
return False
1691+
16711692
def input_conversion(
16721693
self,
16731694
cpp_type: CppType,
@@ -1886,8 +1907,81 @@ def input_conversion(
18861907

18871908
return code, "deref(%s)" % temp_var, cleanup_code
18881909

1910+
elif self._has_delegating_converter(tt):
1911+
# Case 5: Element type has a converter that supports delegation
1912+
# Use explicit loop with element converter instead of letting Cython handle it
1913+
item = "item%s" % arg_num
1914+
conv_item = "conv_item%s" % arg_num
1915+
element_converter = self.cr.get(tt)
1916+
1917+
# Get element input conversion
1918+
elem_code, elem_call_as, elem_cleanup = element_converter.input_conversion(
1919+
tt, item, arg_num
1920+
)
1921+
1922+
code = Code().add(
1923+
"""
1924+
|cdef libcpp_vector[$inner] * $temp_var = new libcpp_vector[$inner]()
1925+
""",
1926+
locals(),
1927+
)
1928+
1929+
# Add element conversion code (may include variable declarations)
1930+
if hasattr(elem_code, "content") and elem_code.content:
1931+
# Extract any if-block from elem_code and wrap in loop
1932+
code.add(
1933+
"""
1934+
|for $item in $argument_var:
1935+
""",
1936+
locals(),
1937+
)
1938+
code.add(elem_code)
1939+
code.add(
1940+
"""
1941+
| $temp_var.push_back(<$inner>$item)
1942+
""",
1943+
locals(),
1944+
)
1945+
else:
1946+
code.add(
1947+
"""
1948+
|for $item in $argument_var:
1949+
| $temp_var.push_back($elem_call_as)
1950+
""",
1951+
locals(),
1952+
)
1953+
1954+
cleanup_code = Code().add("")
1955+
if cpp_type.topmost_is_ref and not cpp_type.topmost_is_const:
1956+
it = mangle("it_" + argument_var)
1957+
out_converter = self.cr.get(tt)
1958+
elem_out = out_converter.output_conversion(tt, "deref(%s)" % it, conv_item)
1959+
if elem_out is None:
1960+
elem_out_code = "%s = deref(%s)" % (conv_item, it)
1961+
elif hasattr(elem_out, "render"):
1962+
elem_out_code = elem_out.render()
1963+
else:
1964+
elem_out_code = str(elem_out)
1965+
cleanup_code = Code().add(
1966+
"""
1967+
|replace = []
1968+
|cdef libcpp_vector[$inner].iterator $it = $temp_var.begin()
1969+
|while $it != $temp_var.end():
1970+
| $elem_out_code
1971+
| replace.append($conv_item)
1972+
| inc($it)
1973+
|$argument_var[:] = replace
1974+
|del $temp_var
1975+
""",
1976+
locals(),
1977+
)
1978+
else:
1979+
cleanup_code = Code().add("del %s" % temp_var)
1980+
1981+
return code, "deref(%s)" % temp_var, cleanup_code
1982+
18891983
else:
1890-
# Case 5: We wrap a regular type
1984+
# Case 6: We wrap a regular type
18911985
inner = self.converters.cython_type(tt)
18921986
# cython cares for conversion of stl containers with std types:
18931987
code = Code().add(
@@ -1982,6 +2076,35 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var
19822076
)
19832077
return code
19842078

2079+
elif self._has_delegating_converter(tt):
2080+
# Element type has a converter that supports delegation
2081+
# Use explicit loop with element converter
2082+
it = mangle("it_" + input_cpp_var)
2083+
item = mangle("item_" + output_py_var)
2084+
element_converter = self.cr.get(tt)
2085+
2086+
# Get element output conversion
2087+
elem_out = element_converter.output_conversion(tt, "deref(%s)" % it, item)
2088+
if elem_out is None:
2089+
elem_out_code = "%s = deref(%s)" % (item, it)
2090+
elif hasattr(elem_out, "render"):
2091+
elem_out_code = elem_out.render()
2092+
else:
2093+
elem_out_code = str(elem_out)
2094+
2095+
code = Code().add(
2096+
"""
2097+
|$output_py_var = []
2098+
|cdef libcpp_vector[$inner].iterator $it = $input_cpp_var.begin()
2099+
|while $it != $input_cpp_var.end():
2100+
| $elem_out_code
2101+
| $output_py_var.append($item)
2102+
| inc($it)
2103+
""",
2104+
locals(),
2105+
)
2106+
return code
2107+
19852108
else:
19862109
# cython cares for conversion of stl containers with std types:
19872110
code = Code().add(
@@ -2406,16 +2529,17 @@ def output_conversion(
24062529
class StdStringUnicodeConverter(StdStringConverter):
24072530
"""
24082531
This converter deals with functions that expect a C++ std::string.
2409-
Note that this provider will NOT be picked up if it is located inside
2410-
a container (e.g. std::vector aka libcpp_vector). Please use the usual
2411-
StdStringConverter to at least get the typing right.
2532+
It can be used inside containers when delegation is enabled.
24122533
It can only be used in function parameters (i.e. input).
24132534
It can handle both bytes and unicode strings and converts to bytes internally.
24142535
"""
24152536

24162537
def get_base_types(self) -> List[str]:
24172538
return ["libcpp_utf8_string"]
24182539

2540+
def supports_delegation(self) -> bool:
2541+
return True
2542+
24192543
def matching_python_type(self, cpp_type: CppType) -> str:
24202544
return "" # TODO can we use "basestring"?
24212545

@@ -2446,9 +2570,7 @@ def type_check_expression(self, cpp_type: CppType, argument_var: str) -> str:
24462570
class StdStringUnicodeOutputConverter(StdStringUnicodeConverter):
24472571
"""
24482572
This converter deals with functions that return a C++ std::string.
2449-
Note that this provider will NOT be picked up if it is located inside
2450-
a container (e.g. std::vector aka libcpp_vector). Please use the usual
2451-
StdStringConverter to at least get the typing right.
2573+
It can be used inside containers when delegation is enabled.
24522574
It should only be used in function returns (i.e. output).
24532575
It returns unicode strings to python and therefore expects the C++
24542576
function to return something that is decodable from utf8 (including ascii)
@@ -2457,6 +2579,9 @@ class StdStringUnicodeOutputConverter(StdStringUnicodeConverter):
24572579
def get_base_types(self) -> List[str]:
24582580
return ["libcpp_utf8_output_string"]
24592581

2582+
def supports_delegation(self) -> bool:
2583+
return True
2584+
24602585
def matching_python_type_full(self, cpp_type: CppType) -> str:
24612586
return "str" # python3
24622587

tests/test_code_generator.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,57 @@ def test_automatic_output_string_conversion():
398398
assert msg == expected
399399

400400

401+
def test_utf8_string_vector_conversion():
402+
"""Test that UTF-8 strings in vectors are properly converted via delegation."""
403+
target = os.path.join(test_files, "generated", "libcpp_utf8_string_vector_test.pyx")
404+
include_dirs = autowrap.parse_and_generate_code(
405+
["libcpp_utf8_string_vector_test.pxd"],
406+
root=test_files,
407+
target=target,
408+
debug=True,
409+
)
410+
411+
wrapped = autowrap.Utils.compile_and_import(
412+
"libcpp_utf8_string_vector_wrapped",
413+
[
414+
target,
415+
],
416+
include_dirs,
417+
)
418+
h = wrapped.Utf8VectorTest()
419+
420+
# Test output conversion - vector of UTF-8 strings should become list of str
421+
greetings = h.get_greetings()
422+
assert isinstance(greetings, list)
423+
assert len(greetings) == 4
424+
# All elements should be unicode strings (str in Python 3)
425+
for s in greetings:
426+
assert isinstance(s, str), f"Expected str, got {type(s)}"
427+
assert "Hello" in greetings
428+
assert "World" in greetings
429+
assert "Привет" in greetings # Russian
430+
assert "你好" in greetings # Chinese
431+
432+
# Test input conversion - list of str/bytes should be accepted
433+
input_strings = ["Test", "Тест", "测试"] # ASCII, Russian, Chinese
434+
result = h.echo(input_strings)
435+
assert isinstance(result, list)
436+
assert len(result) == 3
437+
for s in result:
438+
assert isinstance(s, str), f"Expected str, got {type(s)}"
439+
assert result == input_strings
440+
441+
# Test with bytes input
442+
input_bytes = [b"Hello", b"World"]
443+
result = h.echo(input_bytes)
444+
assert isinstance(result, list)
445+
assert len(result) == 2
446+
447+
# Test count function (input only)
448+
count = h.count_strings(["a", "b", "c"])
449+
assert count == 3
450+
451+
401452
def test_wrap_ignore_foreign_cimports():
402453
"""
403454
Test that wrap-ignored classes are not included in foreign cimports.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include <string>
2+
#include <vector>
3+
4+
class Utf8VectorTest {
5+
public:
6+
Utf8VectorTest(){}
7+
8+
std::vector<std::string> get_greetings() const {
9+
return {"Hello", "World", "Привет", "你好"};
10+
}
11+
12+
std::vector<std::string> echo(const std::vector<std::string>& input) const {
13+
return input;
14+
}
15+
16+
size_t count_strings(const std::vector<std::string>& input) const {
17+
return input.size();
18+
}
19+
};
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# cython: language_level=3
2+
from libcpp.string cimport string as libcpp_utf8_output_string
3+
from libcpp.string cimport string as libcpp_utf8_string
4+
from libcpp.vector cimport vector as libcpp_vector
5+
6+
cdef extern from "libcpp_utf8_string_vector_test.hpp":
7+
cdef cppclass Utf8VectorTest:
8+
Utf8VectorTest()
9+
libcpp_vector[libcpp_utf8_output_string] get_greetings()
10+
libcpp_vector[libcpp_utf8_output_string] echo(libcpp_vector[libcpp_utf8_string])
11+
size_t count_strings(libcpp_vector[libcpp_utf8_string])

0 commit comments

Comments
 (0)