From 3d2038124d7eb0dc0e2d8b143e37ad2250d2f9f7 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Fri, 29 Nov 2024 11:16:59 +0100 Subject: [PATCH 1/3] [ntuple] value behavior for RNTuple[Global|Cluster]Range Make RNTupleGlobalRange and RNTupleClusterRange copyable and default-constructible. --- tree/ntuple/v7/inc/ROOT/RNTupleView.hxx | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx index bb0b0832ce32b..c2a69fedaa08b 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx @@ -40,8 +40,8 @@ namespace Experimental { // clang-format on class RNTupleGlobalRange { private: - NTupleSize_t fStart; - NTupleSize_t fEnd; + NTupleSize_t fStart = kInvalidNTupleIndex; + NTupleSize_t fEnd = kInvalidNTupleIndex; public: class RIterator { @@ -67,7 +67,9 @@ public: bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; } }; + RNTupleGlobalRange() = default; RNTupleGlobalRange(NTupleSize_t start, NTupleSize_t end) : fStart(start), fEnd(end) {} + RIterator begin() { return RIterator(fStart); } RIterator end() { return RIterator(fEnd); } NTupleSize_t size() { return fEnd - fStart; } @@ -84,9 +86,10 @@ public: // clang-format on class RNTupleClusterRange { private: - const DescriptorId_t fClusterId; - const ClusterSize_t::ValueType fStart; - const ClusterSize_t::ValueType fEnd; + DescriptorId_t fClusterId = kInvalidDescriptorId; + ClusterSize_t::ValueType fStart = kInvalidClusterIndex; + ClusterSize_t::ValueType fEnd = kInvalidClusterIndex; + public: class RIterator { private: @@ -111,8 +114,10 @@ public: bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; } }; + RNTupleClusterRange() = default; RNTupleClusterRange(DescriptorId_t clusterId, ClusterSize_t::ValueType start, ClusterSize_t::ValueType end) : fClusterId(clusterId), fStart(start), fEnd(end) {} + RIterator begin() { return RIterator(RClusterIndex(fClusterId, fStart)); } RIterator end() { return RIterator(RClusterIndex(fClusterId, fEnd)); } }; From 373837cbd878bcac81472f59859a837d8cb9e217 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Fri, 29 Nov 2024 12:36:15 +0100 Subject: [PATCH 2/3] [ntuple] fix moving RNTupleCollectionView Workaround. The underlying problem, moving the RField, is still broken. To be addressed at a later point. --- tree/ntuple/v7/inc/ROOT/RNTupleView.hxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx b/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx index c2a69fedaa08b..4d74954659a24 100644 --- a/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx +++ b/tree/ntuple/v7/inc/ROOT/RNTupleView.hxx @@ -354,14 +354,14 @@ class RNTupleCollectionView { private: Internal::RPageSource *fSource; - RField> fField; + std::unique_ptr>> fField; // TODO(jblomer): fix moving RField RFieldBase::RValue fValue; RNTupleCollectionView(DescriptorId_t fieldId, const std::string &fieldName, Internal::RPageSource *source) - : fSource(source), fField(fieldName), fValue(fField.CreateValue()) + : fSource(source), fField(new RField>(fieldName)), fValue(fField->CreateValue()) { - fField.SetOnDiskId(fieldId); - Internal::CallConnectPageSourceOnField(fField, *source); + fField->SetOnDiskId(fieldId); + Internal::CallConnectPageSourceOnField(*fField, *source); } static RNTupleCollectionView Create(DescriptorId_t fieldId, Internal::RPageSource *source) @@ -382,10 +382,10 @@ private: DescriptorId_t GetFieldId(std::string_view fieldName) { auto descGuard = fSource->GetSharedDescriptorGuard(); - auto fieldId = descGuard->FindFieldId(fieldName, fField.GetOnDiskId()); + auto fieldId = descGuard->FindFieldId(fieldName, fField->GetOnDiskId()); if (fieldId == kInvalidDescriptorId) { throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in collection '" + - descGuard->GetQualifiedFieldName(fField.GetOnDiskId()) + "'")); + descGuard->GetQualifiedFieldName(fField->GetOnDiskId()) + "'")); } return fieldId; } @@ -400,7 +400,7 @@ public: RNTupleClusterRange GetCollectionRange(NTupleSize_t globalIndex) { ClusterSize_t size; RClusterIndex collectionStart; - fField.GetCollectionInfo(globalIndex, &collectionStart, &size); + fField->GetCollectionInfo(globalIndex, &collectionStart, &size); return RNTupleClusterRange(collectionStart.GetClusterId(), collectionStart.GetIndex(), collectionStart.GetIndex() + size); } @@ -408,7 +408,7 @@ public: { ClusterSize_t size; RClusterIndex collectionStart; - fField.GetCollectionInfo(clusterIndex, &collectionStart, &size); + fField->GetCollectionInfo(clusterIndex, &collectionStart, &size); return RNTupleClusterRange(collectionStart.GetClusterId(), collectionStart.GetIndex(), collectionStart.GetIndex() + size); } From 570e576480f8427ee0c1d14c3c2a19d3ba78c981 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Thu, 28 Nov 2024 23:48:17 +0100 Subject: [PATCH 3/3] [ntuple] add streaming vector tutorial --- .../v7/ntuple/ntpl015_streaming_vector.C | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 tutorials/v7/ntuple/ntpl015_streaming_vector.C diff --git a/tutorials/v7/ntuple/ntpl015_streaming_vector.C b/tutorials/v7/ntuple/ntpl015_streaming_vector.C new file mode 100644 index 0000000000000..adc3b04970152 --- /dev/null +++ b/tutorials/v7/ntuple/ntpl015_streaming_vector.C @@ -0,0 +1,161 @@ +/// \file +/// \ingroup tutorial_ntuple +/// +/// Example of a streaming vector: a special purpose container that reads large vectors piece-wise. +/// +/// \macro_code +/// +/// \date November 2024 +/// \author Peter van Gemmeren, the ROOT Team + +// NOTE: The RNTuple classes are experimental at this point. +// Functionality and interface are still subject to changes. + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +using namespace ROOT::Experimental; + +constexpr char const *kFileName = "ntpl015_streaming_vector.root"; +constexpr char const *kNTupleName = "ntpl"; +constexpr char const *kFieldName = "LargeVector"; +constexpr unsigned int kNEvents = 10; +constexpr unsigned int kVectorSize = 1000000; + +void CreateRNTuple() +{ + auto model = RNTupleModel::Create(); + auto ptrLargeVector = model->MakeField>(kFieldName); + auto writer = RNTupleWriter::Recreate(std::move(model), kNTupleName, kFileName); + + auto prng = std::make_unique(); + prng->SetSeed(); + + for (NTupleSize_t i = 0; i < kNEvents; i++) { + ptrLargeVector->clear(); + for (std::size_t j = 0; j < kVectorSize; j++) + ptrLargeVector->emplace_back(prng->Integer(-1)); + writer->Fill(); + } + std::cout << "RNTuple written" << std::endl; +} + +/* + * ================================================================================================== + */ + +void ReadRNTupleSimple() +{ + auto reader = RNTupleReader::Open(kNTupleName, kFileName); + + const auto nEntries = reader->GetNEntries(); + std::cout << "Simple reading, found " << nEntries << " entries" << std::endl; + + auto ptrLargeVector = reader->GetModel().GetDefaultEntry().GetPtr>(kFieldName); + for (NTupleSize_t i = 0; i < nEntries; i++) { + reader->LoadEntry(i); + + const auto vectorSize = ptrLargeVector->size(); + uint64_t sum = 0; + for (auto val : *ptrLargeVector) + sum += val; + + std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl; + } + std::cout << "RNTuple simple read" << std::endl; +} + +/* + * ================================================================================================== + */ + +template +class RStreamingVector { + RNTupleCollectionView fVectorView; + RNTupleView fItemView; + RNTupleClusterRange fRange; + NTupleSize_t fEntry{0}; + NTupleSize_t fSize{0}; + +public: + class iterator { + RNTupleClusterRange::RIterator fIndex; + RNTupleView &fView; + + public: + iterator(RNTupleClusterRange::RIterator index, RNTupleView &view) : fIndex(index), fView(view) {} + ~iterator() = default; + + iterator operator++(int) /* postfix */ + { + auto r = *this; + ++(*this); + return r; + } + iterator &operator++() /* prefix */ + { + ++fIndex; + return *this; + } + const T &operator*() { return fView.operator()(*fIndex); } + const T *operator->() { return &fView.operator()(*fIndex); } + bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; } + bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; } + }; + + RStreamingVector(RNTupleCollectionView &&vectorView) + : fVectorView(std::move(vectorView)), fItemView(fVectorView.GetView("_0")) + { + } + ~RStreamingVector() = default; + + NTupleSize_t size() const { return fSize; } + + iterator begin() { return iterator(fRange.begin(), fItemView); } + iterator end() { return iterator(fRange.end(), fItemView); } + + void LoadEntry(NTupleSize_t entry) + { + fEntry = entry; + fRange = fVectorView.GetCollectionRange(fEntry); + fSize = fVectorView.operator()(fEntry); + } +}; + +void ReadRNTupleStreamingVector() +{ + auto reader = RNTupleReader::Open(kNTupleName, kFileName); + + const auto nEntries = reader->GetNEntries(); + std::cout << "Streamed reading, found " << nEntries << " entries" << std::endl; + + RStreamingVector streamingVector(reader->GetCollectionView(kFieldName)); + for (NTupleSize_t i = 0; i < nEntries; i++) { + streamingVector.LoadEntry(i); + + const auto vectorSize = streamingVector.size(); + uint64_t sum = 0; + for (auto val : streamingVector) + sum += val; + + std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl; + } + std::cout << "RNTuple streaming read" << std::endl; +} + +void ntpl015_streaming_vector() +{ + CreateRNTuple(); + ReadRNTupleSimple(); + ReadRNTupleStreamingVector(); +}