Skip to content

Commit 8158b40

Browse files
committed
[df] Fix interaction between RSampleInfo and redirected EOS paths
When the input files are paths to FUSE-mounted EOS files, during the event loop these paths will be redirected to the corresponding xroot EOS URL, in TFile::Open. This was causing a bad interaction with the sample metadata and the subsequent usage in the event loop, e.g. through DefinePerSample. Specifically, we fill a map with the metadata at construction time, which includes the input file paths (without redirection). These will then be irretrievable during the event loop since the map key will not correspond to the redirected path. Fix this by also adding the redirected map during the filling of the map in ChangeSpec.
1 parent deea0ea commit 8158b40

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

tree/dataframe/src/RLoopManager.cxx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,25 @@
4242
#include "ROOT/RNTupleDS.hxx"
4343
#endif
4444

45+
#ifdef R__UNIX
46+
// Functions needed to perform EOS XRootD redirection in ChangeSpec
47+
#include <optional>
48+
#include "TEnv.h"
49+
#include "TSystem.h"
50+
#ifndef R__FBSD
51+
#include <sys/xattr.h>
52+
#else
53+
#include <sys/extattr.h>
54+
#endif
55+
#ifdef R__MACOSX
56+
/* On macOS getxattr takes two extra arguments that should be set to 0 */
57+
#define getxattr(path, name, value, size) getxattr(path, name, value, size, 0u, 0)
58+
#endif
59+
#ifdef R__FBSD
60+
#define getxattr(path, name, value, size) extattr_get_file(path, EXTATTR_NAMESPACE_USER, name, value, size)
61+
#endif
62+
#endif
63+
4564
#include <algorithm>
4665
#include <atomic>
4766
#include <cassert>
@@ -403,6 +422,38 @@ RLoopManager::RLoopManager(ROOT::RDF::Experimental::RDatasetSpec &&spec)
403422
ChangeSpec(std::move(spec));
404423
}
405424

425+
#ifdef R__UNIX
426+
namespace {
427+
std::optional<std::string> GetRedirectedSampleId(std::string_view path, std::string_view datasetName)
428+
{
429+
// Mimick the redirection done in TFile::Open to see if the path points to a FUSE-mounted EOS path.
430+
// If so, we create a redirected sample ID with the full xroot URL.
431+
TString expandedUrl(path.data());
432+
gSystem->ExpandPathName(expandedUrl);
433+
if (gEnv->GetValue("TFile.CrossProtocolRedirects", 1) == 1) {
434+
TUrl fileurl(expandedUrl, /* default is file */ kTRUE);
435+
if (strcmp(fileurl.GetProtocol(), "file") == 0) {
436+
ssize_t len = getxattr(fileurl.GetFile(), "eos.url.xroot", nullptr, 0);
437+
if (len > 0) {
438+
std::string xurl(len, 0);
439+
std::string fileNameFromUrl{fileurl.GetFile()};
440+
if (getxattr(fileNameFromUrl.c_str(), "eos.url.xroot", &xurl[0], len) == len) {
441+
// Sometimes the `getxattr` call may return an invalid URL due
442+
// to the POSIX attribute not being yet completely filled by EOS.
443+
if (auto baseName = fileNameFromUrl.substr(fileNameFromUrl.find_last_of("/") + 1);
444+
std::equal(baseName.crbegin(), baseName.crend(), xurl.crbegin())) {
445+
return xurl + '/' + datasetName.data();
446+
}
447+
}
448+
}
449+
}
450+
}
451+
452+
return std::nullopt;
453+
}
454+
} // namespace
455+
#endif
456+
406457
/**
407458
* @brief Changes the internal TTree held by the RLoopManager.
408459
*
@@ -441,6 +492,11 @@ void RLoopManager::ChangeSpec(ROOT::RDF::Experimental::RDatasetSpec &&spec)
441492
// is exposed to users via RSampleInfo and DefinePerSample).
442493
const auto sampleId = files[i] + '/' + trees[i];
443494
fSampleMap.insert({sampleId, &sample});
495+
#ifdef R__UNIX
496+
// Also add redirected EOS xroot URL when available
497+
if (auto redirectedSampleId = GetRedirectedSampleId(files[i], trees[i]))
498+
fSampleMap.insert({redirectedSampleId.value(), &sample});
499+
#endif
444500
}
445501
}
446502
SetTree(std::move(chain));

0 commit comments

Comments
 (0)