Skip to content

Commit 20cba70

Browse files
committed
[ntuple] Add some internal utilities to the storage layer
Will be used by the RNTupleAttributes.
1 parent 916ea81 commit 20cba70

File tree

9 files changed

+113
-12
lines changed

9 files changed

+113
-12
lines changed

tree/ntuple/inc/ROOT/RMiniFile.hxx

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,16 @@ class TVirtualStreamerInfo;
3232

3333
namespace ROOT {
3434

35+
class RNTupleWriteOptions;
36+
3537
namespace Internal {
38+
39+
class RNTupleFileWriter;
40+
class RPageSource;
3641
class RRawFile;
37-
}
3842

39-
class RNTupleWriteOptions;
43+
TDirectory *GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
4044

41-
namespace Internal {
4245
/// Holds status information of an open ROOT file during writing
4346
struct RTFileControlBlock;
4447

@@ -53,6 +56,8 @@ RNTuple data keys.
5356
*/
5457
// clang-format on
5558
class RMiniFileReader {
59+
friend ROOT::Internal::RPageSource;
60+
5661
private:
5762
/// The raw file used to read byte ranges
5863
ROOT::Internal::RRawFile *fRawFile = nullptr;
@@ -68,9 +73,6 @@ private:
6873
/// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
6974
/// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
7075
RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
71-
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
72-
RResult<RNTuple>
73-
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
7476

7577
/// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
7678
/// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
@@ -84,6 +86,9 @@ public:
8486
explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile);
8587
/// Extracts header and footer location for the RNTuple identified by ntupleName
8688
RResult<RNTuple> GetNTuple(std::string_view ntupleName);
89+
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
90+
RResult<RNTuple>
91+
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
8792
/// Reads a given byte range from the file into the provided memory buffer.
8893
/// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
8994
/// whose addresses are listed at the end of the first chunk.
@@ -109,6 +114,8 @@ A stand-alone version of RNTuple can remove the TFile based writer.
109114
*/
110115
// clang-format on
111116
class RNTupleFileWriter {
117+
friend TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
118+
112119
public:
113120
/// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
114121
static constexpr std::size_t kBlobKeyLen = 42;
@@ -254,7 +261,7 @@ public:
254261
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
255262
/// Ensures that the streamer info records passed as argument are written to the file
256263
void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos);
257-
/// Writes the RNTuple key to the file so that the header and footer keys can be found
264+
/// Writes the RNTuple key to the file so that the header and footer keys can be found.
258265
void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose);
259266
};
260267

tree/ntuple/inc/ROOT/RNTupleFillContext.hxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ private:
103103
std::size_t FillImpl(Entry &entry)
104104
{
105105
ROOT::RNTupleFillStatus status;
106-
FillNoFlush(entry, status);
106+
FillNoFlushImpl(entry, status);
107107
if (status.ShouldFlushCluster())
108108
FlushCluster();
109109
return status.GetLastEntrySize();
@@ -114,6 +114,9 @@ private:
114114
RNTupleFillContext &operator=(const RNTupleFillContext &) = delete;
115115

116116
public:
117+
RNTupleFillContext(RNTupleFillContext &&) = default;
118+
RNTupleFillContext &operator=(RNTupleFillContext &&) = default;
119+
117120
~RNTupleFillContext();
118121

119122
/// Fill an entry into this context, but don't commit the cluster. The calling code must pass an RNTupleFillStatus

tree/ntuple/inc/ROOT/RPageSinkBuf.hxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ public:
148148
void CommitDatasetImpl() final;
149149

150150
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
151+
152+
TDirectory *GetUnderlyingDirectory() const final { return fInnerSink->GetUnderlyingDirectory(); }
151153
}; // RPageSinkBuf
152154

153155
} // namespace Internal

tree/ntuple/inc/ROOT/RPageStorage.hxx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ namespace ROOT {
4747
class RNTupleModel;
4848

4949
namespace Internal {
50-
51-
class RPageAllocator;
5250
class RColumn;
51+
class RMiniFileReader;
52+
class RPageAllocator;
5353
struct RNTupleModelChangeset;
5454

5555
enum class EPageStorageType {
@@ -313,6 +313,8 @@ public:
313313

314314
virtual ROOT::NTupleSize_t GetNEntries() const = 0;
315315

316+
virtual TDirectory *GetUnderlyingDirectory() const { return nullptr; }
317+
316318
/// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
317319
/// Init() associates column handles to the columns referenced by the model
318320
void Init(RNTupleModel &model)
@@ -808,6 +810,8 @@ public:
808810
virtual std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
809811
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) = 0;
810812

813+
virtual RMiniFileReader *GetUnderlyingReader() { return nullptr; }
814+
811815
/// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
812816
/// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
813817
/// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public:
9898
RPageSinkFile(RPageSinkFile &&) = default;
9999
RPageSinkFile &operator=(RPageSinkFile &&) = default;
100100
~RPageSinkFile() override;
101+
102+
virtual TDirectory *GetUnderlyingDirectory() const final { return Internal::GetUnderlyingDirectory(*fWriter); }
103+
104+
ROOT::Internal::RNTupleFileWriter *GetUnderlyingWriter() const { return fWriter.get(); }
101105
}; // class RPageSinkFile
102106

103107
// clang-format off
@@ -149,6 +153,8 @@ private:
149153
std::unique_ptr<ROOT::Internal::RCluster>
150154
PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
151155

156+
RMiniFileReader *GetUnderlyingReader() final { return &fReader; }
157+
152158
protected:
153159
void LoadStructureImpl() final;
154160
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
@@ -173,6 +179,11 @@ public:
173179
RPageSourceFile &operator=(RPageSourceFile &&) = delete;
174180
~RPageSourceFile() override;
175181

182+
/// Creates a new PageSourceFile using the same underlying file as this but referring to a different RNTuple,
183+
/// represented by `anchor`.
184+
std::unique_ptr<RPageSourceFile>
185+
OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
186+
176187
void
177188
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
178189

tree/ntuple/src/RMiniFile.cxx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,3 +1608,11 @@ void ROOT::Internal::RNTupleFileWriter::WriteTFileSkeleton(int defaultCompressio
16081608
fileSimple.Write(&padding, sizeof(padding));
16091609
fileSimple.fKeyOffset = fileSimple.fFilePos;
16101610
}
1611+
1612+
TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer)
1613+
{
1614+
if (auto *proper = std::get_if<ROOT::Internal::RNTupleFileWriter::RFileProper>(&writer.fFile)) {
1615+
return proper->fDirectory;
1616+
}
1617+
return nullptr;
1618+
}

tree/ntuple/src/RPageStorage.cxx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <ROOT/RPageAllocator.hxx>
2525
#include <ROOT/RPageSinkBuf.hxx>
2626
#include <ROOT/RPageStorageFile.hxx>
27+
#include <ROOT/RNTupleReader.hxx>
2728
#ifdef R__ENABLE_DAOS
2829
#include <ROOT/RPageStorageDaos.hxx>
2930
#endif
@@ -1198,8 +1199,8 @@ void ROOT::Internal::RPagePersistentSink::CommitStagedClusters(std::span<RStaged
11981199
if (!columnInfo.fIsSuppressed)
11991200
continue;
12001201
const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1201-
// For suppressed columns, we need to reset the first element index to the first element of the next (upcoming)
1202-
// cluster. This information has been determined for the committed cluster descriptor through
1202+
// For suppressed columns, we need to reset the first element index to the first element of the next
1203+
// (upcoming) cluster. This information has been determined for the committed cluster descriptor through
12031204
// CommitSuppressedColumnRanges(), so we can use the information from the descriptor.
12041205
const auto &columnRangeFromDesc = clusterBuilder.GetColumnRange(colId);
12051206
fOpenColumnRanges[colId].SetFirstElementIndex(columnRangeFromDesc.GetFirstElementIndex() +

tree/ntuple/src/RPageStorageFile.cxx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,15 @@ ROOT::Internal::RPageSourceFile::CreateFromAnchor(const RNTuple &anchor, const R
305305
return pageSource;
306306
}
307307

308+
std::unique_ptr<ROOT::Internal::RPageSourceFile>
309+
ROOT::Internal::RPageSourceFile::OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options)
310+
{
311+
auto pageSource = std::make_unique<RPageSourceFile>("", fFile->Clone(), options);
312+
pageSource->fAnchor = anchor;
313+
pageSource->fNTupleName = pageSource->fDescriptorBuilder.GetDescriptor().GetName();
314+
return pageSource;
315+
}
316+
308317
ROOT::Internal::RPageSourceFile::~RPageSourceFile() = default;
309318

310319
void ROOT::Internal::RPageSourceFile::LoadStructureImpl()

tree/ntuple/test/ntuple_storage.cxx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,3 +1128,59 @@ TEST(RPageSinkFile, StreamerInfo)
11281128
}
11291129
FAIL() << "not all streamer infos found! ";
11301130
}
1131+
1132+
TEST(RPageSourceFile, OpenDifferentAnchor)
1133+
{
1134+
FileRaii fileGuard("test_ntuple_open_diff_anchor.root");
1135+
1136+
auto model = RNTupleModel::Create();
1137+
auto pF = model->MakeField<float>("f");
1138+
auto file = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE"));
1139+
{
1140+
auto writer = RNTupleWriter::Append(std::move(model), "ntpl1", *file);
1141+
for (auto i = 0; i < 100; ++i) {
1142+
*pF = i;
1143+
writer->Fill();
1144+
}
1145+
}
1146+
{
1147+
model = RNTupleModel::Create();
1148+
auto pI = model->MakeField<int>("i");
1149+
auto pC = model->MakeField<char>("c");
1150+
1151+
auto writer = RNTupleWriter::Append(std::move(model), "ntpl2", *file);
1152+
for (auto i = 0; i < 20; ++i) {
1153+
*pI = i;
1154+
*pC = i;
1155+
writer->Fill();
1156+
}
1157+
}
1158+
1159+
auto source = std::make_unique<RPageSourceFile>("ntpl1", fileGuard.GetPath(), RNTupleReadOptions());
1160+
source->Attach();
1161+
EXPECT_EQ(source->GetNEntries(), 100);
1162+
auto desc = source->GetSharedDescriptorGuard();
1163+
EXPECT_NE(desc->FindFieldId("f"), ROOT::kInvalidDescriptorId);
1164+
1165+
auto anchor2 = file->Get<ROOT::RNTuple>("ntpl2");
1166+
ASSERT_NE(anchor2, nullptr);
1167+
auto source2 = source->OpenWithDifferentAnchor(*anchor2);
1168+
source2->Attach();
1169+
EXPECT_EQ(source2->GetNEntries(), 20);
1170+
{
1171+
auto desc2 = source2->GetSharedDescriptorGuard();
1172+
EXPECT_EQ(desc2->FindFieldId("f"), ROOT::kInvalidDescriptorId);
1173+
EXPECT_NE(desc2->FindFieldId("i"), ROOT::kInvalidDescriptorId);
1174+
EXPECT_NE(desc2->FindFieldId("c"), ROOT::kInvalidDescriptorId);
1175+
}
1176+
1177+
source.reset();
1178+
// source2 should still be valid after dropping the first source.
1179+
EXPECT_EQ(source2->GetNEntries(), 20);
1180+
{
1181+
auto desc2 = source2->GetSharedDescriptorGuard();
1182+
EXPECT_EQ(desc2->FindFieldId("f"), ROOT::kInvalidDescriptorId);
1183+
EXPECT_NE(desc2->FindFieldId("i"), ROOT::kInvalidDescriptorId);
1184+
EXPECT_NE(desc2->FindFieldId("c"), ROOT::kInvalidDescriptorId);
1185+
}
1186+
}

0 commit comments

Comments
 (0)