Skip to content

Commit ed31e98

Browse files
committed
[rfile] add GetKeys() methods
1 parent cf0a710 commit ed31e98

File tree

3 files changed

+381
-31
lines changed

3 files changed

+381
-31
lines changed

io/io/inc/ROOT/RFile.hxx

Lines changed: 116 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
#include <ROOT/RError.hxx>
1212

1313
#include <memory>
14+
#include <iostream>
1415
#include <string_view>
1516
#include <typeinfo>
1617

17-
class TFile;
18+
class TIterator;
1819
class TKey;
20+
class TFile;
1921

2022
namespace ROOT {
2123
namespace Experimental {
@@ -29,6 +31,92 @@ ROOT::RLogChannel &RFileLog();
2931

3032
} // namespace Internal
3133

34+
/**
35+
\class ROOT::Experimental::RFileKeyInfo
36+
\ingroup RFile
37+
\brief Information about an RFile object's Key.
38+
39+
Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
40+
etc.
41+
Querying this information can be done via RFile::GetKeys() or RFile::GetKeysNonRecursive. Reading an object's Key
42+
doesn't deserialize the full object, so it's a relatively lightweight operation.
43+
*/
44+
struct RFileKeyInfo {
45+
std::string fName;
46+
std::string fTitle;
47+
std::string fClassName;
48+
std::uint16_t fCycle;
49+
};
50+
51+
/// The iterable returned by RFile::GetKeys() and RFile::GetKeysNonRecursive()
52+
class RFileKeyIterable final {
53+
using Pattern_t = std::string;
54+
55+
TFile *fFile;
56+
Pattern_t fPattern;
57+
std::uint32_t fFlags = 0;
58+
59+
public:
60+
enum EFlags {
61+
kNone = 0,
62+
kRecursive = 1 << 0,
63+
};
64+
65+
class RIterator {
66+
friend class RFileKeyIterable;
67+
68+
struct RIterStackElem {
69+
// This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
70+
// to be copy-constructible.
71+
std::shared_ptr<TIterator> fIter;
72+
std::string fDirPath;
73+
74+
RIterStackElem(TIterator *it, const std::string &path = "") : fIter(it), fDirPath(path) {}
75+
// NOTE: outlined to avoid including TIterator.h
76+
~RIterStackElem();
77+
78+
// fDirPath doesn't need to be compared because it's implied by fIter.
79+
bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
80+
};
81+
82+
std::vector<RIterStackElem> fIterStack;
83+
Pattern_t fPattern;
84+
const TKey *fCurKey = nullptr;
85+
std::uint16_t fRootDirNesting = 0;
86+
std::uint32_t fFlags = 0;
87+
88+
void Advance();
89+
90+
// NOTE: `iter` here is an owning pointer (or null)
91+
RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
92+
93+
public:
94+
using iterator = RIterator;
95+
using iterator_category = std::forward_iterator_tag;
96+
using difference_type = std::ptrdiff_t;
97+
using value_type = RFileKeyInfo;
98+
using pointer = const value_type *;
99+
using reference = const value_type &;
100+
101+
iterator &operator++()
102+
{
103+
Advance();
104+
return *this;
105+
}
106+
value_type operator*();
107+
bool operator!=(const iterator &rh) const { return !(*this == rh); }
108+
bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
109+
};
110+
111+
RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
112+
: fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
113+
{
114+
}
115+
116+
RIterator begin() const;
117+
RIterator end() const;
118+
};
119+
32120
/**
33121
\class ROOT::Experimental::RFile
34122
\ingroup RFile
@@ -103,8 +191,7 @@ class RFile final {
103191

104192
std::unique_ptr<TFile> fFile;
105193

106-
// Outlined to avoid including TFile.h
107-
explicit RFile(std::unique_ptr<TFile> file);
194+
explicit RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
108195

109196
/// Gets object `path` from the file and returns an **owning** pointer to it.
110197
/// The caller should immediately wrap it into a unique_ptr of the type described by `type`.
@@ -147,7 +234,7 @@ public:
147234

148235
///// Instance methods /////
149236

150-
// Outlined to avoid including TFile.h
237+
// NOTE: outlined to avoid including TFile.h
151238
~RFile();
152239

153240
/// Retrieves an object from the file.
@@ -196,6 +283,31 @@ public:
196283

197284
/// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
198285
void Close();
286+
287+
/// Returns an iterable over all paths of objects written into this RFile starting at path "rootPath".
288+
/// The returned paths are always "absolute" paths: they are not relative to `rootPath`.
289+
/// Keys relative to directories are not returned: only those relative to leaf objects are.
290+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
291+
/// This recurses on all the subdirectories of `rootPath`. If you only want the immediate children of `rootPath`,
292+
/// use GetKeysNonRecursive().
293+
RFileKeyIterable GetKeys(std::string_view rootPath = "") const
294+
{
295+
return RFileKeyIterable(fFile.get(), rootPath, RFileKeyIterable::kRecursive);
296+
}
297+
298+
/// Returns an iterable over all paths of objects written into this RFile contained in the directory "rootPath".
299+
/// The returned paths are always "absolute" paths: they are not relative to `rootPath`.
300+
/// Keys relative to directories are not returned: only those relative to leaf objects are.
301+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
302+
/// This only returns the immediate children of `rootPath`. If you want to recurse into the subdirectories of
303+
/// `rootPath`, use GetKeys().
304+
RFileKeyIterable GetKeysNonRecursive(std::string_view rootPath = "") const
305+
{
306+
return RFileKeyIterable(fFile.get(), rootPath, RFileKeyIterable::kNone);
307+
}
308+
309+
/// Prints the internal structure of this RFile to the given stream.
310+
void Print(std::ostream &out = std::cout) const;
199311
};
200312

201313
} // namespace Experimental

io/io/src/RFile.cxx

Lines changed: 118 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#include <Byteswap.h>
1414
#include <TError.h>
1515
#include <TFile.h>
16+
#include <TIterator.h>
1617
#include <TKey.h>
18+
#include <TList.h>
1719
#include <TROOT.h>
1820

1921
#include <algorithm>
@@ -184,10 +186,6 @@ static std::string ValidateAndNormalizePath(std::string &path)
184186

185187
/////////////////////////////////////////////////////////////////////////////////////////////////
186188

187-
RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
188-
189-
RFile::~RFile() = default;
190-
191189
std::unique_ptr<RFile> RFile::Open(std::string_view path)
192190
{
193191
CheckExtension(path);
@@ -227,6 +225,8 @@ std::unique_ptr<RFile> RFile::Recreate(std::string_view path)
227225
return rfile;
228226
}
229227

228+
RFile::~RFile() = default;
229+
230230
TKey *RFile::GetTKey(std::string_view path) const
231231
{
232232
// In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean
@@ -373,6 +373,120 @@ void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, cons
373373
}
374374
}
375375

376+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::~RIterStackElem() = default;
377+
378+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags)
379+
: fPattern(pattern), fFlags(flags)
380+
{
381+
if (iter) {
382+
fIterStack.emplace_back(iter);
383+
384+
if (!pattern.empty()) {
385+
fRootDirNesting = std::count(pattern.begin(), pattern.end(), '/');
386+
// `pattern` may or may not end with '/', but we consider it a directory regardless.
387+
// In other words, like in virtually all filesystem operations, "dir" and "dir/" are equivalent.
388+
fRootDirNesting += pattern.back() != '/';
389+
}
390+
391+
// Advance the iterator to skip the first key, which is always the TFile key.
392+
// This will also skip keys until we reach the first correct key we want to return.
393+
Advance();
394+
}
395+
}
396+
397+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::begin() const
398+
{
399+
return {fFile->GetListOfKeys()->MakeIterator(), fPattern, fFlags};
400+
}
401+
402+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::end() const
403+
{
404+
return {nullptr, fPattern, fFlags};
405+
}
406+
407+
void ROOT::Experimental::RFileKeyIterable::RIterator::Advance()
408+
{
409+
fCurKey = nullptr;
410+
411+
const bool recursive = fFlags & kRecursive;
412+
413+
// We only want to return keys that refer to user objects, not internal ones, therefore we skip
414+
// all keys that have internal class names.
415+
while (!fIterStack.empty()) {
416+
auto &[iter, dirPath] = fIterStack.back();
417+
assert(iter);
418+
TObject *keyObj = iter->Next();
419+
if (!keyObj) {
420+
// reached end of the iteration
421+
fIterStack.pop_back();
422+
continue;
423+
}
424+
425+
assert(keyObj->IsA() == TClass::GetClass<TKey>());
426+
auto key = static_cast<TKey *>(keyObj);
427+
428+
const auto dirSep = (dirPath.empty() ? "" : "/");
429+
430+
if (strcmp(key->GetClassName(), "TDirectory") == 0 || strcmp(key->GetClassName(), "TDirectoryFile") == 0) {
431+
TDirectory *dir = key->ReadObject<TDirectory>();
432+
TIterator *innerIter = dir->GetListOfKeys()->MakeIterator();
433+
assert(innerIter);
434+
fIterStack.emplace_back(innerIter, dirPath + dirSep + dir->GetName());
435+
continue;
436+
}
437+
438+
// Reconstruct the full path of the key
439+
const auto &fullPath = dirPath + dirSep + key->GetName();
440+
const auto nesting = fIterStack.size() - 1;
441+
442+
// skip key if it's not a child of root dir
443+
if (!ROOT::StartsWith(fullPath, fPattern))
444+
continue;
445+
446+
// check that we are in the same directory as "rootDir".
447+
if (!recursive && nesting != fRootDirNesting)
448+
continue;
449+
450+
// All checks passed: return this key.
451+
assert(!fullPath.empty());
452+
fCurKey = key;
453+
break;
454+
}
455+
}
456+
457+
ROOT::Experimental::RFileKeyInfo ROOT::Experimental::RFileKeyIterable::RIterator::operator*()
458+
{
459+
if (fIterStack.empty())
460+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
461+
462+
const TKey *key = fCurKey;
463+
if (!key)
464+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
465+
466+
const auto &dirPath = fIterStack.back().fDirPath;
467+
468+
RFileKeyInfo keyInfo;
469+
keyInfo.fName = dirPath + (dirPath.empty() ? "" : "/") + key->GetName();
470+
keyInfo.fClassName = key->GetClassName();
471+
keyInfo.fCycle = key->GetCycle();
472+
keyInfo.fTitle = key->GetTitle();
473+
return keyInfo;
474+
}
475+
476+
void RFile::Print(std::ostream &out) const
477+
{
478+
std::vector<RFileKeyInfo> keys;
479+
auto keysIter = GetKeys();
480+
for (const auto &key : keysIter) {
481+
keys.emplace_back(key);
482+
}
483+
484+
std::sort(keys.begin(), keys.end(), [](const auto &a, const auto &b) { return a.fName < b.fName; });
485+
for (const auto &key : keys) {
486+
out << key.fClassName << " " << key.fName << ";" << key.fCycle << ": \"" << key.fTitle << "\"\n";
487+
}
488+
}
489+
376490
size_t RFile::Flush()
377491
{
378492
return fFile->Write();

0 commit comments

Comments
 (0)