Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -296,19 +296,17 @@ def RDataFrameAsNumpy(
result_ptrs = {}
for column in columns:
column_type = df.GetColumnType(column)
# bool columns should be taken as unsigned chars, because NumPy stores
# bools in bytes - different from the std::vector<bool> returned by the
# action, which might do some space optimization
column_type = "unsigned char" if column_type == "bool" else column_type

# If the column type is a class, make sure cling knows about it
tclass = ROOT.TClass.GetClass(column_type)
if tclass and not tclass.GetClassInfo():
raise RuntimeError(
f'The column named "{column}" is of type "{column_type}", which is not known to the ROOT interpreter. Please load the corresponding header files or dictionaries.'
)

result_ptrs[column] = df.Take[column_type](column)
# We take the values via ROOT::RVec to avoid having to deal with std::vector<bool>
# This uses one single data structure for all array types, which exposes the array interface
# allowing zero-copy conversion to numpy array
result_ptrs[column] = df.Take[f"{column_type}, ROOT::RVec<{column_type}>"](column)

result = AsNumpyResult(result_ptrs, columns)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,73 @@

from . import pythonization

# This map includes all relevant C++ fundamental types found at
# https://en.cppreference.com/w/cpp/language/types.html and the associated
# ROOT portable types when available.
_array_interface_dtype_map = {
"Long64_t": "i",
"ULong64_t": "u",
"double": "f",
"float": "f",
# Integral types
# C++ standard integer types
"short": "i",
"short int": "i",
"signed short": "i",
"signed short int": "i",
"unsigned short": "u",
"unsigned short int": "u",
"int": "i",
"long": "i",
"unsigned char": "b",
"signed": "i",
"signed int": "i",
"unsigned": "u",
"unsigned int": "u",
"long": "i",
"long int": "i",
"signed long": "i",
"signed long int": "i",
"unsigned long": "u",
"unsigned long int": "u",
"long long": "i",
"long long int": "i",
"signed long long": "i",
"signed long long int": "i",
"unsigned long long": "u",
"unsigned long long int": "u",
"std::size_t": "i",
# Extended standard integer types
"std::int8_t": "i",
"std::int16_t": "i",
"std::int32_t": "i",
"std::int64_t": "i",
"std::uint8_t": "u",
"std::uint16_t": "u",
"std::uint32_t": "u",
"std::uint64_t": "u",
# ROOT integer types
"Int_t": "i",
"UInt_t": "u",
"Short_t": "i",
"UShort_t": "u",
"Long_t": "i",
"ULong_t": "u",
"Long64_t": "i",
"ULong64_t": "u",
# Boolean type
"bool": "b",
"Bool_t": "b",
# Character types
"char": "i",
"Char_t": "i",
"signed char": "i",
"unsigned char": "u",
"UChar_t": "u",
"char16_t": "i",
"char32_t": "i",
# Floating-point types
# C++ standard floating-point types
"float": "f",
"double": "f",
"long double": "f",
# ROOT floating-point types
"Float_t": "f",
"Double_t": "f",
}


Expand Down
20 changes: 16 additions & 4 deletions bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import pickle
import platform
import tempfile
import unittest
from pathlib import Path

import numpy as np
import ROOT
from ROOT._pythonization._rdataframe import _clone_asnumpyresult

import os

Check failure on line 10 in bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py:10:8: F401 `os` imported but unused

Check failure on line 10 in bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py:1:1: I001 Import block is un-sorted or un-formatted

def make_tree(*dtypes):
"""
Expand Down Expand Up @@ -90,10 +90,22 @@
Test bool data-type as a special case since we cannot adopt
the std::vector<bool> with numpy arrays
"""
df = ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)")
treename = "test_branch_bool"
filename = "test_branch_bool.root"
# Snapshot a TTree so that column 'x' will be of type 'Bool_t'
ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)").Snapshot(treename, filename)
# The column 'y' will instead have type 'bool'
df = ROOT.RDataFrame(treename, filename).Define("y", "bool(rdfentry_)")
self.assertEqual(df.GetColumnType("x"), "Bool_t")
self.assertEqual(df.GetColumnType("y"), "bool")
npy = df.AsNumpy()
self.assertFalse(bool(npy["x"][0]))
self.assertTrue(bool(npy["x"][1]))
# Both numpy arrays should have dtype bool
self.assertEqual(npy["x"].dtype, bool)
self.assertEqual(npy["y"].dtype, bool)
self.assertFalse(npy["x"][0])
self.assertTrue(npy["x"][1])
self.assertFalse(npy["y"][0])
self.assertTrue(npy["y"][1])

def test_read_array(self):
"""
Expand Down
Loading