From 12331f20e4b9b1835fe2dd540594d736e3bb0dca Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Fri, 24 Oct 2025 15:58:02 +0200 Subject: [PATCH 1/2] [python] Extend dictionary for numpy array interface types Include all standard C++ fundamental types and the corresponding ROOT C++ types. Connect each of them with the correct Numpy array interface basic type as described at https://numpy.org/doc/stable/reference/arrays.interface.html#object.__array_interface__ . --- .../python/ROOT/_pythonization/_rvec.py | 69 +++++++++++++++++-- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rvec.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rvec.py index d92b0f2c3ab57..7e5f8c42e1ad6 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rvec.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rvec.py @@ -62,16 +62,73 @@ from . import pythonization +# This map includes all relevant C++ fundamental types found at +# https://en.cppreference.com/w/cpp/language/types.html and the associated +# ROOT portable types when available. _array_interface_dtype_map = { - "Long64_t": "i", - "ULong64_t": "u", - "double": "f", - "float": "f", + # Integral types + # C++ standard integer types + "short": "i", + "short int": "i", + "signed short": "i", + "signed short int": "i", + "unsigned short": "u", + "unsigned short int": "u", "int": "i", - "long": "i", - "unsigned char": "b", + "signed": "i", + "signed int": "i", + "unsigned": "u", "unsigned int": "u", + "long": "i", + "long int": "i", + "signed long": "i", + "signed long int": "i", "unsigned long": "u", + "unsigned long int": "u", + "long long": "i", + "long long int": "i", + "signed long long": "i", + "signed long long int": "i", + "unsigned long long": "u", + "unsigned long long int": "u", + "std::size_t": "i", + # Extended standard integer types + "std::int8_t": "i", + "std::int16_t": "i", + "std::int32_t": "i", + "std::int64_t": "i", + "std::uint8_t": "u", + "std::uint16_t": "u", + "std::uint32_t": "u", + "std::uint64_t": "u", + # ROOT integer types + "Int_t": "i", + "UInt_t": "u", + "Short_t": "i", + "UShort_t": "u", + "Long_t": "i", + "ULong_t": "u", + "Long64_t": "i", + "ULong64_t": "u", + # Boolean type + "bool": "b", + "Bool_t": "b", + # Character types + "char": "i", + "Char_t": "i", + "signed char": "i", + "unsigned char": "u", + "UChar_t": "u", + "char16_t": "i", + "char32_t": "i", + # Floating-point types + # C++ standard floating-point types + "float": "f", + "double": "f", + "long double": "f", + # ROOT floating-point types + "Float_t": "f", + "Double_t": "f", } From b7780163230e099efdf3258de965a742d4ddfe87 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Fri, 24 Oct 2025 16:05:47 +0200 Subject: [PATCH 2/2] [python] Extend RDataFrame to Numpy array conversion of boolean types * Use ROOT::RVec as the single data structure to recover all types of values via the Take operation. This helps avoiding issues with vectors of boolean values, whether they are visible as the 'bool' or 'Bool_t' C++ types. * Add a test that checks both 'bool' and 'Bool_t' types. For the latter, it is required to write a TTree branch. --- .../python/ROOT/_pythonization/_rdataframe.py | 10 ++++------ .../pythonizations/test/rdataframe_asnumpy.py | 20 +++++++++++++++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdataframe.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdataframe.py index f9bb2771a73a8..c784c9ca1957c 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdataframe.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdataframe.py @@ -296,10 +296,6 @@ def RDataFrameAsNumpy( result_ptrs = {} for column in columns: column_type = df.GetColumnType(column) - # bool columns should be taken as unsigned chars, because NumPy stores - # bools in bytes - different from the std::vector returned by the - # action, which might do some space optimization - column_type = "unsigned char" if column_type == "bool" else column_type # If the column type is a class, make sure cling knows about it tclass = ROOT.TClass.GetClass(column_type) @@ -307,8 +303,10 @@ def RDataFrameAsNumpy( raise RuntimeError( f'The column named "{column}" is of type "{column_type}", which is not known to the ROOT interpreter. Please load the corresponding header files or dictionaries.' ) - - result_ptrs[column] = df.Take[column_type](column) + # We take the values via ROOT::RVec to avoid having to deal with std::vector + # This uses one single data structure for all array types, which exposes the array interface + # allowing zero-copy conversion to numpy array + result_ptrs[column] = df.Take[f"{column_type}, ROOT::RVec<{column_type}>"](column) result = AsNumpyResult(result_ptrs, columns) diff --git a/bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py b/bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py index dd9dd87f64507..0bc84e84b5b41 100644 --- a/bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py +++ b/bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py @@ -7,7 +7,7 @@ import numpy as np import ROOT from ROOT._pythonization._rdataframe import _clone_asnumpyresult - +import os def make_tree(*dtypes): """ @@ -90,10 +90,22 @@ def test_branch_bool(self): Test bool data-type as a special case since we cannot adopt the std::vector with numpy arrays """ - df = ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)") + treename = "test_branch_bool" + filename = "test_branch_bool.root" + # Snapshot a TTree so that column 'x' will be of type 'Bool_t' + ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)").Snapshot(treename, filename) + # The column 'y' will instead have type 'bool' + df = ROOT.RDataFrame(treename, filename).Define("y", "bool(rdfentry_)") + self.assertEqual(df.GetColumnType("x"), "Bool_t") + self.assertEqual(df.GetColumnType("y"), "bool") npy = df.AsNumpy() - self.assertFalse(bool(npy["x"][0])) - self.assertTrue(bool(npy["x"][1])) + # Both numpy arrays should have dtype bool + self.assertEqual(npy["x"].dtype, bool) + self.assertEqual(npy["y"].dtype, bool) + self.assertFalse(npy["x"][0]) + self.assertTrue(npy["x"][1]) + self.assertFalse(npy["y"][0]) + self.assertTrue(npy["y"][1]) def test_read_array(self): """