Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ add_dependencies(rootcint rootcint)
if (TARGET Gui)
ROOT_EXECUTABLE(rootbrowse src/rootbrowse.cxx LIBRARIES RIO Core Rint Gui)
endif()
ROOT_EXECUTABLE(rootcp src/rootcp.cxx LIBRARIES RIO Tree Core Rint)
ROOT_EXECUTABLE(rootls src/rootls.cxx LIBRARIES RIO Tree Core Rint ROOTNTuple)

ROOT_ADD_TEST_SUBDIRECTORY(test)
46 changes: 0 additions & 46 deletions main/python/cmdLineUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
# http://stackoverflow.com/questions/4675728/redirect-stdout-to-a-file-in-python/22434262#22434262
# Thanks J.F. Sebastian !!

from contextlib import contextmanager
import os
import sys
from time import sleep

Check failure on line 19 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

main/python/cmdLineUtils.py:19:18: F401 `time.sleep` imported but unused
from itertools import zip_longest

Check failure on line 20 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

main/python/cmdLineUtils.py:20:23: F401 `itertools.zip_longest` imported but unused

Check failure on line 20 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

main/python/cmdLineUtils.py:16:1: I001 Import block is un-sorted or un-formatted

def fileno(file_or_fd):
"""
Expand Down Expand Up @@ -81,10 +81,10 @@
ROOT.PyConfig.IgnoreCommandLineOptions = True
ROOT.gROOT.GetVersion()

import argparse

Check failure on line 84 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E402)

main/python/cmdLineUtils.py:84:1: E402 Module level import not at top of file
import glob

Check failure on line 85 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E402)

main/python/cmdLineUtils.py:85:1: E402 Module level import not at top of file
import fnmatch

Check failure on line 86 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E402)

main/python/cmdLineUtils.py:86:1: E402 Module level import not at top of file
import logging

Check failure on line 87 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E402)

main/python/cmdLineUtils.py:87:1: E402 Module level import not at top of file

Check failure on line 87 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

main/python/cmdLineUtils.py:84:1: I001 Import block is un-sorted or un-formatted

LOG_FORMAT = "%(levelname)s: %(message)s"
logging.basicConfig(format=LOG_FORMAT)
Expand Down Expand Up @@ -328,12 +328,12 @@
Open a ROOT file (like openROOTFile) with the possibility
to change compression settings
"""
if compress != None and os.path.isfile(fileName):

Check failure on line 331 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E711)

main/python/cmdLineUtils.py:331:20: E711 Comparison to `None` should be `cond is not None`
logging.warning("can't change compression settings on existing file")
return None
mode = "recreate" if recreate else "update"
theFile = openROOTFile(fileName, mode)
if compress != None:

Check failure on line 336 in main/python/cmdLineUtils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E711)

main/python/cmdLineUtils.py:336:20: E711 Comparison to `None` should be `cond is not None`
theFile.SetCompressionSettings(compress)
return theFile

Expand Down Expand Up @@ -783,52 +783,6 @@
# End of help strings
##########

##########
# ROOTCP


def _copyObjects(fileName, pathSplitList, destFile, destPathSplit, oneFile, recursive, replace):
retcode = 0
destFileName = destFile.GetName()
rootFile = openROOTFile(fileName) if fileName != destFileName else destFile
if not rootFile:
return 1
ROOT.gROOT.GetListOfFiles().Remove(rootFile) # Fast copy necessity
for pathSplit in pathSplitList:
oneSource = oneFile and len(pathSplitList) == 1
retcode += copyRootObject(rootFile, pathSplit, destFile, destPathSplit, oneSource, recursive, replace)
if fileName != destFileName:
rootFile.Close()
return retcode


def rootCp(sourceList, destFileName, destPathSplit, compress=None, recreate=False, recursive=False, replace=False):
# Check arguments
if sourceList == [] or destFileName == "":
return 1
if recreate and destFileName in [n[0] for n in sourceList]:
logging.error("cannot recreate destination file if this is also a source file")
return 1

# Open destination file
destFile = openROOTFileCompress(destFileName, compress, recreate)
if not destFile:
return 1
ROOT.gROOT.GetListOfFiles().Remove(destFile) # Fast copy necessity

# Loop on the root files
retcode = 0
for fileName, pathSplitList in sourceList:
retcode += _copyObjects(
fileName, pathSplitList, destFile, destPathSplit, len(sourceList) == 1, recursive, replace
)
destFile.Close()
return retcode


# End of ROOTCP
##########

##########
# ROOTEVENTSELECTOR

Expand Down
66 changes: 0 additions & 66 deletions main/python/rootcp.py

This file was deleted.

261 changes: 261 additions & 0 deletions main/src/RootObjTree.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
// \file RootObjTree.cxx
///
/// \author Giacomo Parolini <giacomo.parolini@cern.ch>
/// \date 2025-10-14

#include "RootObjTree.hxx"

#include "wildcards.hpp"

#include <TFile.h>

#include <ROOT/StringUtils.hxx>

#include <algorithm>
#include <deque>
#include <iostream>
#include <set>

static bool MatchesGlob(std::string_view haystack, std::string_view pattern)
{
return wildcards::match(haystack, pattern);
}

ROOT::CmdLine::RootSource
ROOT::CmdLine::GetMatchingPathsInFile(std::string_view fileName, std::string_view pattern, std::uint32_t flags)
{
ROOT::CmdLine::RootSource source;
source.fFileName = fileName;
auto &nodeTree = source.fObjectTree;
nodeTree.fFile =
std::unique_ptr<TFile>(TFile::Open(std::string(fileName).c_str(), "READ_WITHOUT_GLOBALREGISTRATION"));
if (!nodeTree.fFile || nodeTree.fFile->IsZombie()) {
source.fErrors.push_back("Failed to open file");
return source;
}

const auto patternSplits = pattern.empty() ? std::vector<std::string>{} : ROOT::Split(pattern, "/");
std::vector<bool> patternWasMatchedAtLeastOnce(patternSplits.size());

/// Match all objects at all nesting levels down to the deepest nesting level of `pattern` (or all nesting levels
/// if we have the "recursive listing" flag). The nodes are visited breadth-first.

// Initialize the nodeTree with the root node and mark it as the first node to visit.
{
ROOT::CmdLine::RootObjNode rootNode = {};
rootNode.fName = std::string(fileName);
rootNode.fClassName = nodeTree.fFile->Class()->GetName();
rootNode.fDir = nodeTree.fFile.get();
nodeTree.fNodes.emplace_back(std::move(rootNode));
}
std::deque<NodeIdx_t> nodesToVisit{0};

// Keep track of the object names found at every nesting level and only add the first one.
std::unordered_set<std::string> namesFound;

const bool isRecursive = flags & EGetMatchingPathsFlags::kRecursive;
do {
NodeIdx_t curIdx = nodesToVisit.front();
nodesToVisit.pop_front();
ROOT::CmdLine::RootObjNode *cur = &nodeTree.fNodes[curIdx];
assert(cur->fDir);

// Gather all keys under this directory and sort them by name.
std::vector<TKey *> keys;
keys.reserve(cur->fDir->GetListOfKeys()->GetEntries());
for (TKey *key : ROOT::Detail::TRangeStaticCast<TKey>(cur->fDir->GetListOfKeys()))
keys.push_back(key);

std::sort(keys.begin(), keys.end(),
[](const auto *a, const auto *b) { return strcmp(a->GetName(), b->GetName()) < 0; });
Comment on lines +69 to +70
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this intermediary step needed? (And it seems it might also shuffle the ordering of the keys that corresponding to the same name).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we want to show the keys sorted by name? I'm not sure I understand the question


namesFound.clear();

// Iterate the keys and find matches
for (TKey *key : keys) {
// NOTE: cur->fNesting can only be >= patternSplits.size() if we have `isRecursive == true` (see the code near
// the end of the outer do/while loop).
// In that case we don't care about matching patterns anymore because we are already beyond the nesting level
// where pattern filtering applies.
// In all other cases, we check if the key name matches the pattern and skip it if it doesn't.
if (cur->fNesting < patternSplits.size()) {
if (MatchesGlob(key->GetName(), patternSplits[cur->fNesting]))
patternWasMatchedAtLeastOnce[cur->fNesting] = true;
else
continue;
}

if (namesFound.count(key->GetName()) > 0) {
std::cerr << "WARNING: Several versions of '" << key->GetName() << "' are present in '" << fileName
<< "'. Only the most recent will be considered.\n";
continue;
}
Comment on lines +89 to +92
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this an error? Having backup keys is the common case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just a warning, and it was like this in rootcp.py so I kept it as-is

namesFound.insert(key->GetName());

auto &newChild = nodeTree.fNodes.emplace_back(NodeFromKey(*key));
// Need to get back cur since the emplace_back() may have moved it.
cur = &nodeTree.fNodes[curIdx];
newChild.fNesting = cur->fNesting + 1;
newChild.fParent = curIdx;
if (!cur->fNChildren)
cur->fFirstChild = nodeTree.fNodes.size() - 1;
cur->fNChildren++;

const auto *cl = TClass::GetClass(key->GetClassName());
if (cl && cl->InheritsFrom("TDirectory"))
newChild.fDir = cur->fDir->GetDirectory(key->GetName());
}

// Only recurse into subdirectories that are up to the deepest level we ask for through `pattern` (except in
// case of recursive listing).
if (cur->fNesting < patternSplits.size() || isRecursive) {
for (auto childIdx = cur->fFirstChild; childIdx < cur->fFirstChild + cur->fNChildren; ++childIdx) {
auto &child = nodeTree.fNodes[childIdx];
if (child.fDir)
nodesToVisit.push_back(childIdx);
else if (cur->fNesting < patternSplits.size())
nodeTree.fLeafList.push_back(childIdx);
}
}
if (cur->fNesting == patternSplits.size()) {
if (cur->fDir)
nodeTree.fDirList.push_back(curIdx);
else
nodeTree.fLeafList.push_back(curIdx);
}
} while (!nodesToVisit.empty());

if (!(flags & kIgnoreFailedMatches)) {
for (auto i = 0u; i < patternSplits.size(); ++i) {
// We don't append errors for '*' because its semantics imply "0 or more matches", so 0 matches is a valid
// case. For any other pattern we expect at least 1 match.
if (!patternWasMatchedAtLeastOnce[i] && !patternSplits[i].empty() && patternSplits[i] != "*") {
std::string err = "'" + std::string(fileName) + ":" +
ROOT::Join("/", std::span<const std::string>{patternSplits.data(), i + 1}) +
"' matches no objects.";
source.fErrors.push_back(err);
}
}
}

return source;
}

ROOT::RResult<std::pair<std::string_view, std::string_view>>
ROOT::CmdLine::SplitIntoFileNameAndPattern(std::string_view sourceRaw)
{
auto prefixIdx = sourceRaw.find("://");
std::string_view::size_type separatorIdx = 0;
if (prefixIdx != std::string_view::npos) {
bool prefixFound = false;
// Handle known URI prefixes
static const char *const specialPrefixes[] = {"http", "https", "root", "gs", "s3"};
auto prefix = sourceRaw.substr(0, prefixIdx);
for (std::string_view knownPrefix : specialPrefixes) {
if (prefix == knownPrefix) {
prefixFound = true;
break;
}
}
if (!prefixFound) {
return R__FAIL("unknown file protocol");
}
separatorIdx = sourceRaw.substr(prefixIdx + 3).find_first_of(':');
if (separatorIdx != std::string_view::npos)
separatorIdx += prefixIdx + 3;
} else {
separatorIdx = sourceRaw.find_first_of(':');
}

if (separatorIdx != std::string_view::npos) {
return {{sourceRaw.substr(0, separatorIdx), sourceRaw.substr(separatorIdx + 1)}};
}
return {{sourceRaw, std::string_view{}}};
}

ROOT::CmdLine::RootSource ROOT::CmdLine::ParseRootSource(std::string_view sourceRaw, std::uint32_t flags)
{
ROOT::CmdLine::RootSource source;

auto res = SplitIntoFileNameAndPattern(sourceRaw);
if (!res) {
source.fErrors.push_back(res.GetError()->GetReport());
return source;
}

auto [fileName, tokens] = res.Unwrap();
source = ROOT::CmdLine::GetMatchingPathsInFile(fileName, tokens, flags);

assert(source.fErrors.empty() == !!source.fObjectTree.fFile);
return source;
}

std::vector<ROOT::CmdLine::RootSource>
ROOT::CmdLine::ParseRootSources(const std::vector<std::string> &sourcesRaw, std::uint32_t flags)
{
std::vector<ROOT::CmdLine::RootSource> sources;
sources.reserve(sourcesRaw.size());

for (const auto &srcRaw : sourcesRaw) {
sources.push_back(ParseRootSource(srcRaw, flags));
}

return sources;
}

void ROOT::CmdLine::PrintObjTree(const RootObjTree &tree, std::ostream &out)
{
if (tree.fNodes.empty())
return;

struct RevNode {
std::set<NodeIdx_t> fChildren;
};
std::vector<RevNode> revNodes;
revNodes.resize(tree.fNodes.size());

// Un-linearize the tree
for (int i = (int)tree.fNodes.size() - 1; i >= 0; --i) {
const auto *node = &tree.fNodes[i];
NodeIdx_t childIdx = i;
NodeIdx_t parentIdx = node->fParent;
while (childIdx != parentIdx) {
auto &revNodeParent = revNodes[parentIdx];
revNodeParent.fChildren.insert(childIdx);
node = &tree.fNodes[parentIdx];
childIdx = parentIdx;
parentIdx = node->fParent;
}
}

// Print out the tree.
// Vector of {nesting, nodeIdx}
std::vector<std::pair<std::uint32_t, NodeIdx_t>> nodesToVisit = {{0, 0}};
while (!nodesToVisit.empty()) {
const auto [nesting, nodeIdx] = nodesToVisit.back();
nodesToVisit.pop_back();
const auto &cur = revNodes[nodeIdx];
const auto &node = tree.fNodes[nodeIdx];
for (auto i = 0u; i < 2 * nesting; ++i)
out << ' ';
out << node.fName << " : " << node.fClassName << "\n";
// Add the children in reverse order to preserve alphabetical order during depth-first visit.
for (auto it = cur.fChildren.rbegin(); it != cur.fChildren.rend(); ++it) {
nodesToVisit.push_back({nesting + 1, *it});
}
}
}

std::string ROOT::CmdLine::NodeFullPath(const ROOT::CmdLine::RootObjTree &tree, ROOT::CmdLine::NodeIdx_t nodeIdx,
ROOT::CmdLine::ENodeFullPathOpt opt)
{
const RootObjNode *node = &tree.fNodes[nodeIdx];
std::string fullPath = node->fName;
while (node->fParent != 0) {
node = &tree.fNodes[node->fParent];
fullPath = node->fName + (fullPath.empty() ? "" : "/") + fullPath;
}
if (opt == ENodeFullPathOpt::kIncludeFilename && nodeIdx > 0)
fullPath = tree.fNodes[0].fName + ":" + fullPath;
return fullPath;
}
Loading
Loading