Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions tree/ntuple/v7/inc/ROOT/RNTupleView.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ namespace Experimental {
// clang-format on
class RNTupleGlobalRange {
private:
NTupleSize_t fStart;
NTupleSize_t fEnd;
NTupleSize_t fStart = kInvalidNTupleIndex;
NTupleSize_t fEnd = kInvalidNTupleIndex;

public:
class RIterator {
Expand All @@ -67,7 +67,9 @@ public:
bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
};

RNTupleGlobalRange() = default;
RNTupleGlobalRange(NTupleSize_t start, NTupleSize_t end) : fStart(start), fEnd(end) {}

RIterator begin() { return RIterator(fStart); }
RIterator end() { return RIterator(fEnd); }
NTupleSize_t size() { return fEnd - fStart; }
Expand All @@ -84,9 +86,10 @@ public:
// clang-format on
class RNTupleClusterRange {
private:
const DescriptorId_t fClusterId;
const ClusterSize_t::ValueType fStart;
const ClusterSize_t::ValueType fEnd;
DescriptorId_t fClusterId = kInvalidDescriptorId;
ClusterSize_t::ValueType fStart = kInvalidClusterIndex;
ClusterSize_t::ValueType fEnd = kInvalidClusterIndex;

public:
class RIterator {
private:
Expand All @@ -111,8 +114,10 @@ public:
bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
};

RNTupleClusterRange() = default;
RNTupleClusterRange(DescriptorId_t clusterId, ClusterSize_t::ValueType start, ClusterSize_t::ValueType end)
: fClusterId(clusterId), fStart(start), fEnd(end) {}

RIterator begin() { return RIterator(RClusterIndex(fClusterId, fStart)); }
RIterator end() { return RIterator(RClusterIndex(fClusterId, fEnd)); }
};
Expand Down Expand Up @@ -349,14 +354,14 @@ class RNTupleCollectionView {

private:
Internal::RPageSource *fSource;
RField<RNTupleCardinality<std::uint64_t>> fField;
std::unique_ptr<RField<RNTupleCardinality<std::uint64_t>>> fField; // TODO(jblomer): fix moving RField
RFieldBase::RValue fValue;

RNTupleCollectionView(DescriptorId_t fieldId, const std::string &fieldName, Internal::RPageSource *source)
: fSource(source), fField(fieldName), fValue(fField.CreateValue())
: fSource(source), fField(new RField<RNTupleCardinality<std::uint64_t>>(fieldName)), fValue(fField->CreateValue())
{
fField.SetOnDiskId(fieldId);
Internal::CallConnectPageSourceOnField(fField, *source);
fField->SetOnDiskId(fieldId);
Internal::CallConnectPageSourceOnField(*fField, *source);
}

static RNTupleCollectionView Create(DescriptorId_t fieldId, Internal::RPageSource *source)
Expand All @@ -377,10 +382,10 @@ private:
DescriptorId_t GetFieldId(std::string_view fieldName)
{
auto descGuard = fSource->GetSharedDescriptorGuard();
auto fieldId = descGuard->FindFieldId(fieldName, fField.GetOnDiskId());
auto fieldId = descGuard->FindFieldId(fieldName, fField->GetOnDiskId());
if (fieldId == kInvalidDescriptorId) {
throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in collection '" +
descGuard->GetQualifiedFieldName(fField.GetOnDiskId()) + "'"));
descGuard->GetQualifiedFieldName(fField->GetOnDiskId()) + "'"));
}
return fieldId;
}
Expand All @@ -395,15 +400,15 @@ public:
RNTupleClusterRange GetCollectionRange(NTupleSize_t globalIndex) {
ClusterSize_t size;
RClusterIndex collectionStart;
fField.GetCollectionInfo(globalIndex, &collectionStart, &size);
fField->GetCollectionInfo(globalIndex, &collectionStart, &size);
return RNTupleClusterRange(collectionStart.GetClusterId(), collectionStart.GetIndex(),
collectionStart.GetIndex() + size);
}
RNTupleClusterRange GetCollectionRange(RClusterIndex clusterIndex)
{
ClusterSize_t size;
RClusterIndex collectionStart;
fField.GetCollectionInfo(clusterIndex, &collectionStart, &size);
fField->GetCollectionInfo(clusterIndex, &collectionStart, &size);
return RNTupleClusterRange(collectionStart.GetClusterId(), collectionStart.GetIndex(),
collectionStart.GetIndex() + size);
}
Expand Down
161 changes: 161 additions & 0 deletions tutorials/v7/ntuple/ntpl015_streaming_vector.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/// \file
/// \ingroup tutorial_ntuple
///
/// Example of a streaming vector: a special purpose container that reads large vectors piece-wise.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like the tutorial is lacking some explanation of the interplay between the various components. Especially in places like RStreamingVector::iterator constructor and operator*. We should probably point the user to also check the functioning of RNTupleCollectionView to understand better the rest of the tutorial

///
/// \macro_code
///
/// \date November 2024
/// \author Peter van Gemmeren, the ROOT Team

// NOTE: The RNTuple classes are experimental at this point.
// Functionality and interface are still subject to changes.

#include <ROOT/RNTupleModel.hxx>
#include <ROOT/RNTupleReader.hxx>
#include <ROOT/RNTupleUtil.hxx>
#include <ROOT/RNTupleView.hxx>
#include <ROOT/RNTupleWriter.hxx>

#include <TRandom3.h>

#include <cstdint>
#include <iostream>
#include <vector>
#include <utility>

using namespace ROOT::Experimental;

constexpr char const *kFileName = "ntpl015_streaming_vector.root";
constexpr char const *kNTupleName = "ntpl";
constexpr char const *kFieldName = "LargeVector";
constexpr unsigned int kNEvents = 10;
constexpr unsigned int kVectorSize = 1000000;

Check failure on line 33 in tutorials/v7/ntuple/ntpl015_streaming_vector.C

View workflow job for this annotation

GitHub Actions / alma9 modules_off runtime_cxxmodules=Off

redefinition of 'kVectorSize' with a different type: 'const unsigned int' vs 'const Int_t' (aka 'const int')

void CreateRNTuple()
{
auto model = RNTupleModel::Create();
auto ptrLargeVector = model->MakeField<std::vector<std::uint32_t>>(kFieldName);
auto writer = RNTupleWriter::Recreate(std::move(model), kNTupleName, kFileName);

auto prng = std::make_unique<TRandom3>();
prng->SetSeed();

for (NTupleSize_t i = 0; i < kNEvents; i++) {
ptrLargeVector->clear();
for (std::size_t j = 0; j < kVectorSize; j++)
ptrLargeVector->emplace_back(prng->Integer(-1));
writer->Fill();
}
std::cout << "RNTuple written" << std::endl;
}

/*
* ==================================================================================================
*/

void ReadRNTupleSimple()
{
auto reader = RNTupleReader::Open(kNTupleName, kFileName);

const auto nEntries = reader->GetNEntries();
std::cout << "Simple reading, found " << nEntries << " entries" << std::endl;

auto ptrLargeVector = reader->GetModel().GetDefaultEntry().GetPtr<std::vector<std::uint32_t>>(kFieldName);
for (NTupleSize_t i = 0; i < nEntries; i++) {
reader->LoadEntry(i);

const auto vectorSize = ptrLargeVector->size();
uint64_t sum = 0;
for (auto val : *ptrLargeVector)
sum += val;

std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl;
}
std::cout << "RNTuple simple read" << std::endl;
}

/*
* ==================================================================================================
*/

template <class T>
class RStreamingVector {
RNTupleCollectionView fVectorView;
RNTupleView<T> fItemView;
RNTupleClusterRange fRange;
NTupleSize_t fEntry{0};
NTupleSize_t fSize{0};

public:
class iterator {
RNTupleClusterRange::RIterator fIndex;
RNTupleView<T> &fView;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should store a pointer to make the type copyable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be copyable though? To me the semantics would be a bit fuzzy (would the copy automatically get the same window of entries loaded in? Also, when you copy a std::vector you do a deep copy, but in this case to my understanding it wouldn't)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't that hint that the class is more of a RStreamingVectorView and/or RVectorStreamingView?


public:
iterator(RNTupleClusterRange::RIterator index, RNTupleView<T> &view) : fIndex(index), fView(view) {}
~iterator() = default;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have to define this destructor, we should follow rule of five


iterator operator++(int) /* postfix */
{
auto r = *this;
++(*this);
return r;
}
iterator &operator++() /* prefix */
{
++fIndex;
return *this;
}
const T &operator*() { return fView.operator()(*fIndex); }
const T *operator->() { return &fView.operator()(*fIndex); }
bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
};

RStreamingVector(RNTupleCollectionView &&vectorView)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should take the view by value (cf previous discussion on taking move-only types by value vs by xvalue reference)

: fVectorView(std::move(vectorView)), fItemView(fVectorView.GetView<T>("_0"))
{
}
~RStreamingVector() = default;
Comment on lines +116 to +120
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comment about rule of five


NTupleSize_t size() const { return fSize; }

iterator begin() { return iterator(fRange.begin(), fItemView); }
iterator end() { return iterator(fRange.end(), fItemView); }

void LoadEntry(NTupleSize_t entry)
{
fEntry = entry;
fRange = fVectorView.GetCollectionRange(fEntry);
fSize = fVectorView.operator()(fEntry);
}
};

void ReadRNTupleStreamingVector()
{
auto reader = RNTupleReader::Open(kNTupleName, kFileName);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should open the reader with disabled cluster cache, otherwise all pages will still send up in memory I think


const auto nEntries = reader->GetNEntries();
std::cout << "Streamed reading, found " << nEntries << " entries" << std::endl;

RStreamingVector<std::uint32_t> streamingVector(reader->GetCollectionView(kFieldName));
for (NTupleSize_t i = 0; i < nEntries; i++) {
streamingVector.LoadEntry(i);

const auto vectorSize = streamingVector.size();
uint64_t sum = 0;
for (auto val : streamingVector)
sum += val;

std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl;
}
std::cout << "RNTuple streaming read" << std::endl;
}

void ntpl015_streaming_vector()
{
CreateRNTuple();
ReadRNTupleSimple();
ReadRNTupleStreamingVector();
}
Loading