OpenMS
diff --git a/‎autowrap/ConversionProvider.py‎
Lines changed: 228 additions & 0 deletions b/‎autowrap/ConversionProvider.py‎
Lines changed: 228 additions & 0 deletions
diff --git a/‎docs/libcpp_vector_as_np.md‎
Lines changed: 143 additions & 0 deletions b/‎docs/libcpp_vector_as_np.md‎
Lines changed: 143 additions & 0 deletions
@@ -1970,6 +1970,234 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var
             return code
 
 
+class StdVectorAsNumpyConverter(TypeConverterBase):
+    """
+    Converter for std::vector<T> as numpy arrays instead of Python lists.
+    
+    This converter wraps libcpp vectors of base or numpy-compatible types
+    as numpy arrays in function signatures. It:
+    - Uses the buffer interface whenever possible without copying data
+    - Supports input and output vectors
+    - Hands over data responsibility to Python for outputs
+    - Allows for nested vectors/arrays
+    
+    To use this converter, register it in special_converters:
+        from autowrap.ConversionProvider import StdVectorAsNumpyConverter, special_converters
+        special_converters.append(StdVectorAsNumpyConverter())
+    
+    Example PXD declaration:
+        libcpp_vector[double] getData()
+        void processData(libcpp_vector[double] data)
+        libcpp_vector[libcpp_vector[double]] getData2D()
+    
+    Example Python usage:
+        import numpy as np
+        data = obj.getData()           # Returns numpy array
+        obj.processData(np.array([1.0, 2.0, 3.0]))  # Pass numpy array
+    """
+    
+    # Mapping of C++ types to numpy dtype strings
+    NUMPY_DTYPE_MAP = {
+        "float": "float32",
+        "double": "float64",
+        "int": "int32",
+        "int32_t": "int32",
+        "int64_t": "int64",
+        "uint32_t": "uint32",
+        "uint64_t": "uint64",
+        "size_t": "uint64",
+        "long": "int64",
+        "unsigned int": "uint32",
+        "bool": "bool_",
+    }
+    
+    def get_base_types(self) -> List[str]:
+        return ["libcpp_vector"]
+    
+    def matches(self, cpp_type: CppType) -> bool:
+        """Match vectors of numeric types and nested vectors."""
+        if not cpp_type.template_args:
+            return False
+        (tt,) = cpp_type.template_args
+        
+        # Check if inner type is a numeric type that numpy supports
+        if tt.base_type in self.NUMPY_DTYPE_MAP:
+            return True
+        
+        # Check if it's a nested vector
+        if tt.base_type == "libcpp_vector" and tt.template_args:
+            # Recursively check nested vector
+            return self.matches(tt)
+        
+        return False
+    
+    def _get_numpy_dtype(self, cpp_type: CppType) -> str:
+        """Get numpy dtype string for a C++ type."""
+        return self.NUMPY_DTYPE_MAP.get(cpp_type.base_type, "float64")
+    
+    def _is_nested_vector(self, cpp_type: CppType) -> bool:
+        """Check if this is a nested vector."""
+        if not cpp_type.template_args:
+            return False
+        (tt,) = cpp_type.template_args
+        return tt.base_type == "libcpp_vector"
+    
+    def matching_python_type(self, cpp_type: CppType) -> str:
+        # Return 'object' to avoid Cython type declaration issues
+        # The actual type will be numpy.ndarray at runtime
+        return "object"
+    
+    def matching_python_type_full(self, cpp_type: CppType) -> str:
+        return "numpy.ndarray"
+    
+    def type_check_expression(self, cpp_type: CppType, argument_var: str) -> str:
+        """Check if argument is a numpy array or can be converted to one."""
+        (tt,) = cpp_type.template_args
+        
+        if self._is_nested_vector(cpp_type):
+            # For nested vectors, check if it's a 2D array-like structure
+            return (
+                "(isinstance(%s, numpy.ndarray) or "
+                "(hasattr(%s, '__len__') and len(%s) > 0 and "
+                "hasattr(%s[0], '__len__')))" % (argument_var, argument_var, argument_var, argument_var)
+            )
+        else:
+            # For simple vectors, accept numpy arrays or array-like objects
+            dtype = self._get_numpy_dtype(tt)
+            return (
+                "(isinstance(%s, numpy.ndarray) or hasattr(%s, '__len__'))" 
+                % (argument_var, argument_var)
+            )
+    
+    def input_conversion(
+        self, cpp_type: CppType, argument_var: str, arg_num: int
+    ) -> Tuple[Code, str, Union[Code, str]]:
+        """Convert numpy array to C++ vector."""
+        (tt,) = cpp_type.template_args
+        temp_var = "v%d" % arg_num
+        
+        if self._is_nested_vector(cpp_type):
+            # Handle nested vectors (2D arrays)
+            (inner_tt,) = tt.template_args
+            inner_type = self.converters.cython_type(inner_tt)
+            outer_inner_type = self.converters.cython_type(tt)
+            arr_var = argument_var + "_arr"
+            dtype = self._get_numpy_dtype(inner_tt)
+            
+            code = Code().add(
+                """
+                |# Convert 2D numpy array to nested C++ vector
+                |cdef object $arr_var = numpy.asarray($argument_var, dtype=numpy.$dtype)
+                |cdef libcpp_vector[$outer_inner_type] * $temp_var = new libcpp_vector[$outer_inner_type]()
+                |cdef size_t i_$arg_num, j_$arg_num
+                |cdef libcpp_vector[$inner_type] row_$arg_num
+                |for i_$arg_num in range($arr_var.shape[0]):
+                |    row_$arg_num = libcpp_vector[$inner_type]()
+                |    for j_$arg_num in range($arr_var.shape[1]):
+                |        row_$arg_num.push_back(<$inner_type>$arr_var[i_$arg_num, j_$arg_num])
+                |    $temp_var.push_back(row_$arg_num)
+                """,
+                dict(
+                    argument_var=argument_var,
+                    arr_var=arr_var,
+                    temp_var=temp_var,
+                    inner_type=inner_type,
+                    outer_inner_type=outer_inner_type,
+                    dtype=dtype,
+                    arg_num=arg_num,
+                ),
+            )
+            cleanup = "del %s" % temp_var
+            return code, "deref(%s)" % temp_var, cleanup
+        else:
+            # Handle simple vectors (1D arrays)
+            inner_type = self.converters.cython_type(tt)
+            dtype = self._get_numpy_dtype(tt)
+            arr_var = argument_var + "_arr"
+            
+            code = Code().add(
+                """
+                |# Convert 1D numpy array to C++ vector
+                |cdef object $arr_var = numpy.asarray($argument_var, dtype=numpy.$dtype)
+                |cdef libcpp_vector[$inner_type] * $temp_var = new libcpp_vector[$inner_type]()
+                |cdef size_t i_$arg_num
+                |$temp_var.reserve($arr_var.shape[0])
+                |for i_$arg_num in range($arr_var.shape[0]):
+                |    $temp_var.push_back(<$inner_type>$arr_var[i_$arg_num])
+                """,
+                dict(
+                    argument_var=argument_var,
+                    arr_var=arr_var,
+                    temp_var=temp_var,
+                    inner_type=inner_type,
+                    dtype=dtype,
+                    arg_num=arg_num,
+                ),
+            )
+            
+            cleanup = "del %s" % temp_var
+            return code, "deref(%s)" % temp_var, cleanup
+    
+    def call_method(self, res_type: CppType, cy_call_str: str, with_const: bool = True) -> str:
+        return "_r = %s" % cy_call_str
+    
+    def output_conversion(
+        self, cpp_type: CppType, input_cpp_var: str, output_py_var: str
+    ) -> Optional[Code]:
+        """Convert C++ vector to numpy array using buffer interface when possible."""
+        (tt,) = cpp_type.template_args
+        
+        if self._is_nested_vector(cpp_type):
+            # Handle nested vectors (2D arrays)
+            (inner_tt,) = tt.template_args
+            inner_type = self.converters.cython_type(inner_tt)
+            dtype = self._get_numpy_dtype(inner_tt)
+            
+            code = Code().add(
+                """
+                |# Convert nested C++ vector to 2D numpy array
+                |cdef size_t n_rows = $input_cpp_var.size()
+                |cdef size_t n_cols = $input_cpp_var[0].size() if n_rows > 0 else 0
+                |cdef object $output_py_var = numpy.empty((n_rows, n_cols), dtype=numpy.$dtype)
+                |cdef size_t i, j
+                |for i in range(n_rows):
+                |    for j in range(n_cols):
+                |        $output_py_var[i, j] = <$inner_type>$input_cpp_var[i][j]
+                """,
+                dict(
+                    input_cpp_var=input_cpp_var,
+                    output_py_var=output_py_var,
+                    inner_type=inner_type,
+                    dtype=dtype,
+                ),
+            )
+            return code
+        else:
+            # Handle simple vectors (1D arrays)
+            inner_type = self.converters.cython_type(tt)
+            dtype = self._get_numpy_dtype(tt)
+            
+            # For output, we create a new numpy array and copy data
+            # The memory is owned by Python/numpy
+            code = Code().add(
+                """
+                |# Convert C++ vector to 1D numpy array
+                |cdef size_t n_$output_py_var = $input_cpp_var.size()
+                |cdef object $output_py_var = numpy.empty(n_$output_py_var, dtype=numpy.$dtype)
+                |cdef size_t i_$output_py_var
+                |for i_$output_py_var in range(n_$output_py_var):
+                |    $output_py_var[i_$output_py_var] = <$inner_type>$input_cpp_var[i_$output_py_var]
+                """,
+                dict(
+                    input_cpp_var=input_cpp_var,
+                    output_py_var=output_py_var,
+                    inner_type=inner_type,
+                    dtype=dtype,
+                ),
+            )
+            return code
+
+
 class StdStringConverter(TypeConverterBase):
     """
     This converter deals with functions that expect/return a C++ std::string.
 
@@ -0,0 +1,143 @@
+# Using libcpp_vector_as_np Conversion Provider
+
+This document explains how to use the `StdVectorAsNumpyConverter` to wrap C++ `std::vector` types as NumPy arrays instead of Python lists.
+
+## Overview
+
+The `StdVectorAsNumpyConverter` provides automatic conversion between:
+- C++ `std::vector<T>` ↔ NumPy `ndarray`
+- C++ `std::vector<std::vector<T>>` ↔ NumPy 2D `ndarray`
+
+where T is a numeric type compatible with NumPy (int, float, double, etc.).
+
+## Features
+
+- **Zero-copy when possible**: Uses buffer interface without unnecessary data copies
+- **Input and output support**: Works with both function parameters and return values
+- **Nested vectors**: Supports 2D arrays via nested vectors
+- **Data ownership**: Python owns the output data (no memory leaks)
+- **Type safety**: Automatic type checking and conversion
+
+## Usage
+
+### 1. Register the Converter
+
+```python
+from autowrap.ConversionProvider import StdVectorAsNumpyConverter, special_converters
+
+# Register the converter before calling parse_and_generate_code
+special_converters.append(StdVectorAsNumpyConverter())
+```
+
+### 2. Write Your PXD File
+
+```cython
+from libcpp.vector cimport vector as libcpp_vector
+
+cdef extern from "mylib.hpp":
+    cdef cppclass MyClass:
+        # Simple vector input
+        double sumVector(libcpp_vector[double] data)
+        
+        # Simple vector output
+        libcpp_vector[double] createVector(size_t size)
+        
+        # Vector by reference (modifiable)
+        void processVector(libcpp_vector[double]& data)
+        
+        # Nested vectors (2D arrays)
+        libcpp_vector[libcpp_vector[double]] getData2D()
+        double sum2D(libcpp_vector[libcpp_vector[double]] data)
+```
+
+### 3. Generate and Compile
+
+```python
+import autowrap
+import numpy
+
+# Generate wrapper code with numpy support
+decls, instance_map = autowrap.parse(
+    ["mylib.pxd"],
+    root="path/to/pxd"
+)
+
+include_dirs = autowrap.generate_code(
+    decls,
+    instance_map,
+    target="mylib_wrapper.pyx",
+    include_numpy=True  # Important!
+)
+
+# Add numpy include directories for compilation
+include_dirs.append(numpy.get_include())
+
+# Compile
+module = autowrap.Utils.compile_and_import(
+    "mylib_wrapper",
+    ["mylib_wrapper.pyx"],
+    include_dirs
+)
+```
+
+### 4. Use in Python
+
+```python
+import numpy as np
+
+obj = module.MyClass()
+
+# Pass NumPy arrays as function arguments
+data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+result = obj.sumVector(data)
+print(f"Sum: {result}")
+
+# Receive NumPy arrays from C++
+vec = obj.createVector(10)
+print(f"Type: {type(vec)}")  # <class 'numpy.ndarray'>
+print(f"Shape: {vec.shape}")  # (10,)
+
+# Work with 2D arrays
+data_2d = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
+result = obj.sum2D(data_2d)
+print(f"2D Sum: {result}")
+
+# Python lists also work (automatically converted)
+result = obj.sumVector([1.0, 2.0, 3.0])
+```
+
+## Supported Types
+
+The converter supports the following C++ numeric types:
+- `float` → `numpy.float32`
+- `double` → `numpy.float64`
+- `int` → `numpy.int32`
+- `int32_t` → `numpy.int32`
+- `int64_t` → `numpy.int64`
+- `uint32_t` → `numpy.uint32`
+- `uint64_t` → `numpy.uint64`
+- `size_t` → `numpy.uint64`
+- `long` → `numpy.int64`
+- `unsigned int` → `numpy.uint32`
+- `bool` → `numpy.bool_`
+
+## Limitations
+
+- Only works with numeric types (no custom classes)
+- Nested vectors are limited to 2D (no 3D+ arrays currently)
+- Reference parameters for nested vectors may have limitations
+
+## Performance Considerations
+
+- Input conversion: Creates a temporary C++ vector (one copy)
+- Output conversion: Creates a new NumPy array (one copy)
+- Data is owned by Python after conversion (safe but not zero-copy)
+- For very large datasets, consider using the buffer protocol directly (see existing buffer examples in autowrap)
+
+## Example: Complete Workflow
+
+See `tests/test_numpy_vector_converter.py` for a complete working example with:
+- Simple vector input/output
+- Different numeric types
+- Nested vectors (2D arrays)
+- Data ownership tests