Skip to content

Commit 130fc21

Browse files
committed
[ntuple] Add de/serialization of RNTupleAttributes
1 parent 1619f46 commit 130fc21

File tree

6 files changed

+251
-20
lines changed

6 files changed

+251
-20
lines changed

tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,28 @@ struct RNTupleClusterBoundaries {
6767
std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
6868
} // namespace Internal
6969

70+
namespace Experimental::Internal {
71+
72+
struct RNTupleAttrSetDescriptor {
73+
std::uint64_t fAnchorUncompLen = 0;
74+
std::uint16_t fSchemaVersion = 0;
75+
// The locator of the AttributeSet anchor.
76+
// In case of kTypeFile, it points to the beginning of the Anchor's payload.
77+
// NOTE: Only kTypeFile is supported at the moment.
78+
RNTupleLocator fLocator;
79+
std::string fName;
80+
81+
bool operator==(const RNTupleAttrSetDescriptor &other) const
82+
{
83+
return fAnchorUncompLen == other.fAnchorUncompLen && fSchemaVersion == other.fSchemaVersion &&
84+
fLocator == other.fLocator && fName == other.fName;
85+
};
86+
bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
87+
};
88+
89+
const std::vector<RNTupleAttrSetDescriptor> &GetAttributeSets(const RNTupleDescriptor &desc);
90+
} // namespace Experimental::Internal
91+
7092
// clang-format off
7193
/**
7294
\class ROOT::RFieldDescriptor
@@ -645,6 +667,8 @@ and backward compatibility when the metadata evolves.
645667
class RNTupleDescriptor final {
646668
friend class Internal::RNTupleDescriptorBuilder;
647669
friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
670+
friend const std::vector<Experimental::Internal::RNTupleAttrSetDescriptor> &
671+
ROOT::Experimental::Internal::GetAttributeSets(const RNTupleDescriptor &desc);
648672

649673
public:
650674
class RHeaderExtension;
@@ -697,6 +721,8 @@ private:
697721
std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
698722
/// Potentially a subset of all the available clusters
699723
std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
724+
/// List of AttributeSets linked to this RNTuple
725+
std::vector<Experimental::Internal::RNTupleAttrSetDescriptor> fAttributeSets;
700726

701727
// We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
702728
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const;
@@ -841,6 +867,9 @@ public:
841867
bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
842868
std::vector<std::uint64_t> GetFeatureFlags() const;
843869

870+
/// Returns the names of all Attribute Sets associated to this RNTuple.
871+
std::vector<std::string> GetAttributeSetNames() const;
872+
844873
/// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
845874
const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
846875

@@ -1597,6 +1626,8 @@ public:
15971626
RResult<void> AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
15981627
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
15991628

1629+
RResult<void> AddAttributeSet(Experimental::Internal::RNTupleAttrSetDescriptor &&attrSetDesc);
1630+
16001631
/// Mark the beginning of the header extension; any fields and columns added after a call to this function are
16011632
/// annotated as begin part of the header extension.
16021633
void BeginHeaderExtension();
@@ -1630,6 +1661,13 @@ inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
16301661
}
16311662

16321663
} // namespace Internal
1664+
1665+
inline const std::vector<Experimental::Internal::RNTupleAttrSetDescriptor> &
1666+
Experimental::Internal::GetAttributeSets(const RNTupleDescriptor &desc)
1667+
{
1668+
return desc.fAttributeSets;
1669+
}
1670+
16331671
} // namespace ROOT
16341672

16351673
#endif // ROOT_RNTupleDescriptor

tree/ntuple/inc/ROOT/RNTupleSerialize.hxx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class RNTupleDescriptor;
3636
class RClusterDescriptor;
3737
enum class EExtraTypeInfoIds;
3838

39+
namespace Experimental::Internal {
40+
struct RNTupleAttrSetDescriptor;
41+
}
42+
3943
namespace Internal {
4044

4145
class RClusterDescriptorBuilder;
@@ -271,6 +275,11 @@ public:
271275
static RResult<std::uint32_t> DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize,
272276
ROOT::Internal::RNTupleDescriptorBuilder &descBuilder);
273277

278+
static RResult<std::uint32_t>
279+
SerializeAttributeSet(const Experimental::Internal::RNTupleAttrSetDescriptor &attrSetDesc, void *buffer);
280+
static RResult<std::uint32_t> DeserializeAttributeSet(const void *buffer, std::uint64_t bufSize,
281+
Experimental::Internal::RNTupleAttrSetDescriptor &attrSetDesc);
282+
274283
static RResult<RContext> SerializeHeader(void *buffer, const RNTupleDescriptor &desc);
275284
static RResult<std::uint32_t> SerializePageList(void *buffer, const RNTupleDescriptor &desc,
276285
std::span<ROOT::DescriptorId_t> physClusterIDs,

tree/ntuple/src/RNTupleDescriptor.cxx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,7 @@ ROOT::RNTupleDescriptor ROOT::RNTupleDescriptor::Clone() const
785785
clone.fSortedClusterGroupIds = fSortedClusterGroupIds;
786786
for (const auto &d : fClusterDescriptors)
787787
clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
788+
clone.fAttributeSets = fAttributeSets;
788789
return clone;
789790
}
790791

@@ -1359,6 +1360,18 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ReplaceExtraTypeInfo(RExtraTypeIn
13591360
fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
13601361
}
13611362

1363+
ROOT::RResult<void> ROOT::Internal::RNTupleDescriptorBuilder::AddAttributeSet(
1364+
Experimental::Internal::RNTupleAttrSetDescriptor &&attrSetDesc)
1365+
{
1366+
auto &attrSets = fDescriptor.fAttributeSets;
1367+
if (std::find_if(attrSets.begin(), attrSets.end(),
1368+
[&name = attrSetDesc.fName](const auto &desc) { return desc.fName == name; }) != attrSets.end()) {
1369+
return R__FAIL("attribute sets with duplicate names");
1370+
}
1371+
attrSets.push_back(attrSetDesc);
1372+
return RResult<void>::Success();
1373+
}
1374+
13621375
RNTupleSerializer::StreamerInfoMap_t ROOT::Internal::RNTupleDescriptorBuilder::BuildStreamerInfos() const
13631376
{
13641377
RNTupleSerializer::StreamerInfoMap_t streamerInfoMap;
@@ -1474,3 +1487,12 @@ ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable ROOT::RNTupleDescripto
14741487
{
14751488
return RExtraTypeInfoDescriptorIterable(*this);
14761489
}
1490+
1491+
std::vector<std::string> ROOT::RNTupleDescriptor::GetAttributeSetNames() const
1492+
{
1493+
std::vector<std::string> names;
1494+
names.reserve(fAttributeSets.size());
1495+
for (const auto &desc : fAttributeSets)
1496+
names.push_back(desc.fName);
1497+
return names;
1498+
}

tree/ntuple/src/RNTupleSerialize.cxx

Lines changed: 126 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,6 +1799,24 @@ ROOT::RResult<std::uint32_t> ROOT::Internal::RNTupleSerializer::SerializeFooter(
17991799
return R__FORWARD_ERROR(res);
18001800
}
18011801

1802+
// Attributes
1803+
frame = pos;
1804+
const auto &attrSets = ROOT::Experimental::Internal::GetAttributeSets(desc);
1805+
const auto nAttributeSets = attrSets.size();
1806+
pos += SerializeListFramePreamble(nAttributeSets, *where);
1807+
for (const auto &attrSet : attrSets) {
1808+
if (auto res = SerializeAttributeSet(attrSet, *where)) {
1809+
pos += res.Unwrap();
1810+
} else {
1811+
return R__FORWARD_ERROR(res);
1812+
}
1813+
}
1814+
if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, pos - frame)) {
1815+
pos += res.Unwrap();
1816+
} else {
1817+
return R__FORWARD_ERROR(res);
1818+
}
1819+
18021820
std::uint32_t size = pos - base;
18031821
if (auto res = SerializeEnvelopePostscript(base, size)) {
18041822
size += res.Unwrap();
@@ -1808,6 +1826,35 @@ ROOT::RResult<std::uint32_t> ROOT::Internal::RNTupleSerializer::SerializeFooter(
18081826
return size;
18091827
}
18101828

1829+
ROOT::RResult<std::uint32_t> ROOT::Internal::RNTupleSerializer::SerializeAttributeSet(
1830+
const Experimental::Internal::RNTupleAttrSetDescriptor &attrDesc, void *buffer)
1831+
{
1832+
// uncompressed size must fit in 48 bits.
1833+
R__ASSERT(attrDesc.fAnchorUncompLen < (1ull << 48));
1834+
1835+
auto base = reinterpret_cast<unsigned char *>(buffer);
1836+
auto pos = base;
1837+
void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
1838+
1839+
const std::uint64_t uncompLenAndSchemaVersion = (attrDesc.fAnchorUncompLen << 16) | attrDesc.fSchemaVersion;
1840+
1841+
auto frame = pos;
1842+
pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
1843+
pos += SerializeUInt64(uncompLenAndSchemaVersion, *where);
1844+
if (auto res = SerializeLocator(attrDesc.fLocator, *where)) {
1845+
pos += res.Unwrap();
1846+
} else {
1847+
return R__FORWARD_ERROR(res);
1848+
}
1849+
pos += SerializeString(attrDesc.fName, *where);
1850+
auto size = pos - frame;
1851+
if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, size)) {
1852+
return size;
1853+
} else {
1854+
return R__FORWARD_ERROR(res);
1855+
}
1856+
}
1857+
18111858
ROOT::RResult<void> ROOT::Internal::RNTupleSerializer::DeserializeHeader(const void *buffer, std::uint64_t bufSize,
18121859
RNTupleDescriptorBuilder &descBuilder)
18131860
{
@@ -1918,36 +1965,95 @@ ROOT::RResult<void> ROOT::Internal::RNTupleSerializer::DeserializeFooter(const v
19181965
}
19191966
bytes = frame + frameSize;
19201967

1921-
std::uint32_t nClusterGroups;
1922-
frame = bytes;
1923-
if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) {
1924-
bytes += res.Unwrap();
1925-
} else {
1926-
return R__FORWARD_ERROR(res);
1927-
}
1928-
for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1929-
RClusterGroup clusterGroup;
1930-
if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) {
1968+
{
1969+
std::uint32_t nClusterGroups;
1970+
frame = bytes;
1971+
if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) {
19311972
bytes += res.Unwrap();
19321973
} else {
19331974
return R__FORWARD_ERROR(res);
19341975
}
1976+
for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1977+
RClusterGroup clusterGroup;
1978+
if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) {
1979+
bytes += res.Unwrap();
1980+
} else {
1981+
return R__FORWARD_ERROR(res);
1982+
}
19351983

1936-
descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage());
1937-
RClusterGroupDescriptorBuilder clusterGroupBuilder;
1938-
clusterGroupBuilder.ClusterGroupId(groupId)
1939-
.PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator)
1940-
.PageListLength(clusterGroup.fPageListEnvelopeLink.fLength)
1941-
.MinEntry(clusterGroup.fMinEntry)
1942-
.EntrySpan(clusterGroup.fEntrySpan)
1943-
.NClusters(clusterGroup.fNClusters);
1944-
descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap());
1984+
descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage());
1985+
RClusterGroupDescriptorBuilder clusterGroupBuilder;
1986+
clusterGroupBuilder.ClusterGroupId(groupId)
1987+
.PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator)
1988+
.PageListLength(clusterGroup.fPageListEnvelopeLink.fLength)
1989+
.MinEntry(clusterGroup.fMinEntry)
1990+
.EntrySpan(clusterGroup.fEntrySpan)
1991+
.NClusters(clusterGroup.fNClusters);
1992+
descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap());
1993+
}
1994+
bytes = frame + frameSize;
1995+
}
1996+
1997+
// NOTE: Attributes were introduced in v1.0.1.0, so this section may be missing.
1998+
// Testing for > 8 because bufSize includes the checksum.
1999+
if (fnBufSizeLeft() > 8) {
2000+
std::uint32_t nAttributeSets;
2001+
frame = bytes;
2002+
if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAttributeSets)) {
2003+
bytes += res.Unwrap();
2004+
} else {
2005+
return R__FORWARD_ERROR(res);
2006+
}
2007+
for (std::uint32_t attrSetId = 0; attrSetId < nAttributeSets; ++attrSetId) {
2008+
Experimental::Internal::RNTupleAttrSetDescriptor attrSetDesc;
2009+
if (auto res = DeserializeAttributeSet(bytes, fnBufSizeLeft(), attrSetDesc)) {
2010+
descBuilder.AddAttributeSet(std::move(attrSetDesc));
2011+
bytes += res.Unwrap();
2012+
} else {
2013+
return R__FORWARD_ERROR(res);
2014+
}
2015+
}
2016+
bytes = frame + frameSize;
19452017
}
1946-
bytes = frame + frameSize;
19472018

19482019
return RResult<void>::Success();
19492020
}
19502021

2022+
ROOT::RResult<std::uint32_t> ROOT::Internal::RNTupleSerializer::DeserializeAttributeSet(
2023+
const void *buffer, std::uint64_t bufSize, Experimental::Internal::RNTupleAttrSetDescriptor &attrSetDesc)
2024+
{
2025+
auto base = reinterpret_cast<const unsigned char *>(buffer);
2026+
auto bytes = base;
2027+
auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
2028+
2029+
std::uint64_t frameSize;
2030+
if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize)) {
2031+
bytes += res.Unwrap();
2032+
} else {
2033+
return R__FORWARD_ERROR(res);
2034+
}
2035+
std::uint64_t uncompLenAndSchemaVersion;
2036+
bytes += DeserializeUInt64(bytes, uncompLenAndSchemaVersion);
2037+
std::uint64_t uncompLen = uncompLenAndSchemaVersion >> 16;
2038+
std::uint16_t schemaVersion = uncompLenAndSchemaVersion & 0xF;
2039+
RNTupleLocator attrSetLocator;
2040+
if (auto res = DeserializeLocator(bytes, fnBufSizeLeft(), attrSetLocator)) {
2041+
bytes += res.Unwrap();
2042+
} else {
2043+
return R__FORWARD_ERROR(res);
2044+
}
2045+
std::string attrSetName;
2046+
if (auto res = DeserializeString(bytes, fnBufSizeLeft(), attrSetName)) {
2047+
bytes += res.Unwrap();
2048+
} else {
2049+
return R__FORWARD_ERROR(res);
2050+
}
2051+
2052+
attrSetDesc = {uncompLen, schemaVersion, attrSetLocator, attrSetName};
2053+
2054+
return frameSize;
2055+
}
2056+
19512057
ROOT::RResult<std::vector<ROOT::Internal::RClusterDescriptorBuilder>>
19522058
ROOT::Internal::RNTupleSerializer::DeserializePageListRaw(const void *buffer, std::uint64_t bufSize,
19532059
ROOT::DescriptorId_t clusterGroupId,

tree/ntuple/test/ntuple_descriptor.cxx

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,40 @@ TEST(RNTupleDescriptor, GetTypeNameForComparison)
372372
}
373373
}
374374

375+
TEST(RNTupleDescriptor, AttributeSets)
376+
{
377+
RNTupleLocator locator;
378+
locator.SetType(ROOT::RNTupleLocator::kTypeFile);
379+
locator.SetPosition(128ul);
380+
auto attrSetDesc = ROOT::Experimental::Internal::RNTupleAttrSetDescriptor{1024, // uncomp len
381+
1, // schema version
382+
locator, "AttrSetName"};
383+
RNTupleDescriptorBuilder descBuilder;
384+
descBuilder.SetVersion(1, 0, 1, 0);
385+
descBuilder.SetNTuple("ntpl", "");
386+
descBuilder.AddAttributeSet(std::move(attrSetDesc));
387+
388+
locator.SetPosition(555ul);
389+
attrSetDesc = ROOT::Experimental::Internal::RNTupleAttrSetDescriptor{200, // uncomp len
390+
2, // schema version
391+
locator, "AttrSetName 2"};
392+
descBuilder.AddAttributeSet(std::move(attrSetDesc));
393+
394+
auto desc = descBuilder.MoveDescriptor();
395+
auto attrSetNames = desc.GetAttributeSetNames();
396+
ASSERT_EQ(attrSetNames.size(), 2);
397+
EXPECT_EQ(attrSetNames[0], "AttrSetName");
398+
EXPECT_EQ(attrSetNames[1], "AttrSetName 2");
399+
400+
auto attrSets = ROOT::Experimental::Internal::GetAttributeSets(desc);
401+
EXPECT_EQ(attrSets[0].fAnchorUncompLen, 1024);
402+
EXPECT_EQ(attrSets[0].fSchemaVersion, 1);
403+
EXPECT_EQ(attrSets[0].fLocator.GetPosition<std::uint64_t>(), 128);
404+
EXPECT_EQ(attrSets[1].fAnchorUncompLen, 200);
405+
EXPECT_EQ(attrSets[1].fSchemaVersion, 2);
406+
EXPECT_EQ(attrSets[1].fLocator.GetPosition<std::uint64_t>(), 555);
407+
}
408+
375409
TEST(RFieldDescriptorIterable, IterateOverFieldNames)
376410
{
377411
auto model = RNTupleModel::Create();

tree/ntuple/test/ntuple_serialize.cxx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2059,3 +2059,25 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferredInMainHeader)
20592059
EXPECT_EQ(expect2_0, columnRange2_0);
20602060
EXPECT_EQ(expect2_1, columnRange2_1);
20612061
}
2062+
2063+
TEST(RNTuple, SerializeAttrSets)
2064+
{
2065+
RNTupleLocator locator;
2066+
locator.SetType(ROOT::RNTupleLocator::kTypeFile);
2067+
locator.SetPosition(128ul);
2068+
auto attrSetDesc = ROOT::Experimental::Internal::RNTupleAttrSetDescriptor{1024, // uncomp len
2069+
1, // schema version
2070+
locator, "AttrSetName"};
2071+
2072+
auto res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, nullptr);
2073+
ASSERT_TRUE(bool(res));
2074+
2075+
auto buf = MakeUninitArray<std::byte>(res.Unwrap());
2076+
res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, buf.get());
2077+
ASSERT_TRUE(bool(buf));
2078+
2079+
ROOT::Experimental::Internal::RNTupleAttrSetDescriptor deserializedDesc;
2080+
RNTupleSerializer::DeserializeAttributeSet(buf.get(), res.Unwrap(), deserializedDesc);
2081+
2082+
ASSERT_EQ(attrSetDesc, deserializedDesc);
2083+
}

0 commit comments

Comments
 (0)