Skip to content

Commit 48d1ff7

Browse files
committed
[rfile] add GetKeys() methods
1 parent d98d9b9 commit 48d1ff7

File tree

3 files changed

+452
-30
lines changed

3 files changed

+452
-30
lines changed

io/io/inc/ROOT/RFile.hxx

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
#include <ROOT/RError.hxx>
1212

1313
#include <memory>
14+
#include <iostream>
1415
#include <string_view>
1516
#include <typeinfo>
1617

1718
class TFile;
19+
class TIterator;
1820
class TKey;
1921

2022
namespace ROOT {
@@ -29,6 +31,95 @@ ROOT::RLogChannel &RFileLog();
2931

3032
} // namespace Internal
3133

34+
/**
35+
\class ROOT::Experimental::RKeyInfo
36+
\ingroup RFile
37+
\brief Information about an RFile object's Key.
38+
39+
Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
40+
etc.
41+
Querying this information can be done via RFile::ListKeys(). Reading an object's Key
42+
doesn't deserialize the full object, so it's a relatively lightweight operation.
43+
*/
44+
struct RKeyInfo {
45+
enum class ECategory : std::uint16_t {
46+
kInvalid,
47+
kObject,
48+
kDirectory
49+
};
50+
51+
std::string fName;
52+
std::string fTitle;
53+
std::string fClassName;
54+
std::uint16_t fCycle = 0;
55+
ECategory fCategory = ECategory::kInvalid;
56+
};
57+
58+
/// The iterable returned by RFile::ListKeys()
59+
class RFileKeyIterable final {
60+
using Pattern_t = std::string;
61+
62+
TFile *fFile = nullptr;
63+
Pattern_t fPattern;
64+
std::uint32_t fFlags = 0;
65+
66+
public:
67+
class RIterator {
68+
friend class RFileKeyIterable;
69+
70+
struct RIterStackElem {
71+
// This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
72+
// to be copy-constructible.
73+
std::shared_ptr<TIterator> fIter;
74+
std::string fDirPath;
75+
76+
// Outlined to avoid including TIterator.h
77+
RIterStackElem(TIterator *it, const std::string &path = "");
78+
// Outlined to avoid including TIterator.h
79+
~RIterStackElem();
80+
81+
// fDirPath doesn't need to be compared because it's implied by fIter.
82+
bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
83+
};
84+
85+
std::vector<RIterStackElem> fIterStack;
86+
Pattern_t fPattern;
87+
const TKey *fCurKey = nullptr;
88+
std::uint16_t fRootDirNesting = 0;
89+
std::uint32_t fFlags = 0;
90+
91+
void Advance();
92+
93+
// NOTE: `iter` here is an owning pointer (or null)
94+
RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
95+
96+
public:
97+
using iterator = RIterator;
98+
using iterator_category = std::forward_iterator_tag;
99+
using difference_type = std::ptrdiff_t;
100+
using value_type = RKeyInfo;
101+
using pointer = const value_type *;
102+
using reference = const value_type &;
103+
104+
iterator &operator++()
105+
{
106+
Advance();
107+
return *this;
108+
}
109+
value_type operator*();
110+
bool operator!=(const iterator &rh) const { return !(*this == rh); }
111+
bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
112+
};
113+
114+
RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
115+
: fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
116+
{
117+
}
118+
119+
RIterator begin() const;
120+
RIterator end() const;
121+
};
122+
32123
/**
33124
\class ROOT::Experimental::RFile
34125
\ingroup RFile
@@ -126,6 +217,12 @@ class RFile final {
126217
TKey *GetTKey(std::string_view path) const;
127218

128219
public:
220+
enum EListKeyFlags {
221+
kListObjects = 1 << 0,
222+
kListDirs = 1 << 1,
223+
kListRecursive = 1 << 2,
224+
};
225+
129226
// This is arbitrary, but it's useful to avoid pathological cases
130227
static constexpr int kMaxPathNesting = 1000;
131228

@@ -196,6 +293,23 @@ public:
196293

197294
/// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
198295
void Close();
296+
297+
/// Returns an iterable over all paths of objects written into this RFile starting at path "rootPath".
298+
/// The returned paths are always "absolute" paths: they are not relative to `rootPath`.
299+
/// Keys relative to directories are not returned: only those relative to leaf objects are.
300+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
301+
/// `flags` is a bitmask specifying the listing mode.
302+
/// If `(flags & kListObject) != 0`, the listing will include keys of non-directory objects (default);
303+
/// If `(flags & kListDirs) != 0`, the listing will include keys of directory objects;
304+
/// If `(flags & kListRecursive) != 0`, the listing will recurse on all subdirectories of `rootPath` (default),
305+
/// otherwise it will only list immediate children of `rootPath`.
306+
RFileKeyIterable ListKeys(std::string_view rootPath = "", std::uint32_t flags = kListObjects | kListRecursive) const
307+
{
308+
return RFileKeyIterable(fFile.get(), rootPath, flags);
309+
}
310+
311+
/// Prints the internal structure of this RFile to the given stream.
312+
void Print(std::ostream &out = std::cout) const;
199313
};
200314

201315
} // namespace Experimental

io/io/src/RFile.cxx

Lines changed: 141 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#include <Byteswap.h>
1414
#include <TError.h>
1515
#include <TFile.h>
16+
#include <TIterator.h>
1617
#include <TKey.h>
18+
#include <TList.h>
1719
#include <TROOT.h>
1820

1921
#include <algorithm>
@@ -169,18 +171,14 @@ static std::string ValidateAndNormalizePath(std::string &path)
169171

170172
/////////////////////////////////////////////////////////////////////////////////////////////////
171173

172-
RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
173-
174-
RFile::~RFile() = default;
175-
176174
std::unique_ptr<RFile> RFile::Open(std::string_view path)
177175
{
178176
TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe?
179177
auto tfile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "READ_WITHOUT_GLOBALREGISTRATION"));
180178
if (!tfile || tfile->IsZombie())
181179
throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading"));
182180

183-
if (tfile->IsRaw())
181+
if (tfile->IsRaw() || !tfile->IsBinary())
184182
throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file"));
185183

186184
auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
@@ -194,7 +192,7 @@ std::unique_ptr<RFile> RFile::Update(std::string_view path)
194192
if (!tfile || tfile->IsZombie())
195193
throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for updating"));
196194

197-
if (tfile->IsRaw())
195+
if (tfile->IsRaw() || !tfile->IsBinary())
198196
throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file"));
199197

200198
auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
@@ -208,13 +206,17 @@ std::unique_ptr<RFile> RFile::Recreate(std::string_view path)
208206
if (!tfile || tfile->IsZombie())
209207
throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for writing"));
210208

211-
if (tfile->IsRaw())
209+
if (tfile->IsRaw() || !tfile->IsBinary())
212210
throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file"));
213211

214212
auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
215213
return rfile;
216214
}
217215

216+
RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
217+
218+
RFile::~RFile() = default;
219+
218220
TKey *RFile::GetTKey(std::string_view path) const
219221
{
220222
// In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean
@@ -361,6 +363,138 @@ void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, cons
361363
}
362364
}
363365

366+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::RIterStackElem(TIterator *it, const std::string &path)
367+
: fIter(it), fDirPath(path)
368+
{
369+
}
370+
371+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::~RIterStackElem() = default;
372+
373+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags)
374+
: fPattern(pattern), fFlags(flags)
375+
{
376+
if (iter) {
377+
fIterStack.emplace_back(iter);
378+
379+
if (!pattern.empty()) {
380+
fRootDirNesting = std::count(pattern.begin(), pattern.end(), '/');
381+
// `pattern` may or may not end with '/', but we consider it a directory regardless.
382+
// In other words, like in virtually all filesystem operations, "dir" and "dir/" are equivalent.
383+
fRootDirNesting += pattern.back() != '/';
384+
}
385+
386+
// Advance the iterator to skip the first key, which is always the TFile key.
387+
// This will also skip keys until we reach the first correct key we want to return.
388+
Advance();
389+
}
390+
}
391+
392+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::begin() const
393+
{
394+
return {fFile->GetListOfKeys()->MakeIterator(), fPattern, fFlags};
395+
}
396+
397+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::end() const
398+
{
399+
return {nullptr, fPattern, fFlags};
400+
}
401+
402+
void ROOT::Experimental::RFileKeyIterable::RIterator::Advance()
403+
{
404+
fCurKey = nullptr;
405+
406+
const bool recursive = fFlags & RFile::kListRecursive;
407+
const bool includeObj = fFlags & RFile::kListObjects;
408+
const bool includeDirs = fFlags & RFile::kListDirs;
409+
410+
// We only want to return keys that refer to user objects, not internal ones, therefore we skip
411+
// all keys that have internal class names.
412+
while (!fIterStack.empty()) {
413+
auto &[iter, dirPath] = fIterStack.back();
414+
assert(iter);
415+
TObject *keyObj = iter->Next();
416+
if (!keyObj) {
417+
// reached end of the iteration
418+
fIterStack.pop_back();
419+
continue;
420+
}
421+
422+
assert(keyObj->IsA() == TClass::GetClass<TKey>());
423+
auto key = static_cast<TKey *>(keyObj);
424+
425+
const auto dirSep = (dirPath.empty() ? "" : "/");
426+
const auto isDir =
427+
strcmp(key->GetClassName(), "TDirectory") == 0 || strcmp(key->GetClassName(), "TDirectoryFile") == 0;
428+
429+
if (isDir) {
430+
TDirectory *dir = key->ReadObject<TDirectory>();
431+
TIterator *innerIter = dir->GetListOfKeys()->MakeIterator();
432+
assert(innerIter);
433+
fIterStack.emplace_back(innerIter, dirPath + dirSep + dir->GetName());
434+
if (!includeDirs)
435+
continue;
436+
} else if (!includeObj) {
437+
continue;
438+
}
439+
440+
// Reconstruct the full path of the key
441+
const auto &fullPath = dirPath + dirSep + key->GetName();
442+
const auto nesting = fIterStack.size() - 1;
443+
444+
// Skip key if it's not a child of root dir
445+
if (!ROOT::StartsWith(fullPath, fPattern))
446+
continue;
447+
448+
// Check that we are in the same directory as "rootDir".
449+
// Note that for directories we list both the root dir and the immediate children (in non-recursive mode).
450+
if (!recursive && nesting != fRootDirNesting && (!isDir || nesting != fRootDirNesting + 1))
451+
continue;
452+
453+
// All checks passed: return this key.
454+
assert(!fullPath.empty());
455+
fCurKey = key;
456+
break;
457+
}
458+
}
459+
460+
ROOT::Experimental::RKeyInfo ROOT::Experimental::RFileKeyIterable::RIterator::operator*()
461+
{
462+
if (fIterStack.empty())
463+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
464+
465+
const TKey *key = fCurKey;
466+
if (!key)
467+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
468+
469+
const bool isDir =
470+
strcmp(key->GetClassName(), "TDirectory") == 0 || strcmp(key->GetClassName(), "TDirectoryFile") == 0;
471+
const auto &dirPath = fIterStack.back().fDirPath;
472+
473+
RKeyInfo keyInfo;
474+
keyInfo.fCategory = isDir ? RKeyInfo::ECategory::kDirectory : RKeyInfo::ECategory::kObject;
475+
keyInfo.fName = dirPath;
476+
if (!isDir)
477+
keyInfo.fName += std::string(dirPath.empty() ? "" : "/") + key->GetName();
478+
keyInfo.fClassName = key->GetClassName();
479+
keyInfo.fCycle = key->GetCycle();
480+
keyInfo.fTitle = key->GetTitle();
481+
return keyInfo;
482+
}
483+
484+
void RFile::Print(std::ostream &out) const
485+
{
486+
std::vector<RKeyInfo> keys;
487+
auto keysIter = ListKeys();
488+
for (const auto &key : keysIter) {
489+
keys.emplace_back(key);
490+
}
491+
492+
std::sort(keys.begin(), keys.end(), [](const auto &a, const auto &b) { return a.fName < b.fName; });
493+
for (const auto &key : keys) {
494+
out << key.fClassName << " " << key.fName << ";" << key.fCycle << ": \"" << key.fTitle << "\"\n";
495+
}
496+
}
497+
364498
size_t RFile::Flush()
365499
{
366500
return fFile->Write();

0 commit comments

Comments
 (0)