2929#include < sstream>
3030#include " boost/algorithm/string.hpp"
3131
32- namespace {
33- edm::rntuple_temp::RNTupleTempOutputModule::Optimizations fromConfig (edm::ParameterSet const & iConfig) {
34- edm::rntuple_temp::RNTupleTempOutputModule::Optimizations opts ;
35- opts. approxZippedClusterSize = iConfig. getUntrackedParameter < unsigned long long >( " approxZippedClusterSize " );
36- opts. maxUnzippedClusterSize = iConfig. getUntrackedParameter < unsigned long long >( " maxUnzippedClusterSize " );
37- opts. initialUnzippedPageSize = iConfig. getUntrackedParameter < unsigned long long >( " initialUnzippedPageSize " );
38- opts. maxUnzippedPageSize = iConfig. getUntrackedParameter < unsigned long long >( " maxUnzippedPageSize " );
39- opts. pageBufferBudget = iConfig. getUntrackedParameter < unsigned long long >( " pageBufferBudget " );
40- opts. useBufferedWrite = iConfig. getUntrackedParameter < bool >( " useBufferedWrite " );
41- opts. useDirectIO = iConfig. getUntrackedParameter < bool >( " useDirectIO " );
42- return opts ;
32+ namespace edm ::rntuple_temp {
33+ inline bool RNTupleTempOutputModule::SetStreamerForDataProduct::match (std::string const & iBranchName) const {
34+ return std::regex_match (iBranchName, branch_) ;
35+ }
36+
37+ std::regex RNTupleTempOutputModule::SetStreamerForDataProduct::convert (
38+ std::string const & iGlobBranchExpression) const {
39+ std::string tmp (iGlobBranchExpression );
40+ boost::replace_all (tmp, " * " , " .* " );
41+ boost::replace_all (tmp, " ? " , " . " );
42+ return std::regex (tmp) ;
4343 }
44- } // namespace
44+
45+ namespace {
46+ std::vector<RNTupleTempOutputModule::SetStreamerForDataProduct> fromConfig (
47+ std::vector<edm::ParameterSet> const & iConfig) {
48+ std::vector<RNTupleTempOutputModule::SetStreamerForDataProduct> returnValue;
49+ returnValue.reserve (iConfig.size ());
50+
51+ for (auto const & prod : iConfig) {
52+ returnValue.emplace_back (prod.getUntrackedParameter <std::string>(" product" ),
53+ prod.getUntrackedParameter <bool >(" useStreamer" ));
54+ }
55+ return returnValue;
56+ }
57+
58+ std::optional<bool > useStreamer (std::string const & iName,
59+ std::vector<RNTupleTempOutputModule::SetStreamerForDataProduct> const & iSpecial) {
60+ auto nameNoDot = iName.substr (0 , iName.size () - 1 );
61+ for (auto const & prod : iSpecial) {
62+ if (prod.match (nameNoDot)) {
63+ return prod.useStreamer_ ;
64+ }
65+ }
66+ return {};
67+ }
68+
69+ edm::rntuple_temp::RNTupleTempOutputModule::Optimizations fromConfig (edm::ParameterSet const & iConfig) {
70+ edm::rntuple_temp::RNTupleTempOutputModule::Optimizations opts;
71+ opts.approxZippedClusterSize = iConfig.getUntrackedParameter <unsigned long long >(" approxZippedClusterSize" );
72+ opts.maxUnzippedClusterSize = iConfig.getUntrackedParameter <unsigned long long >(" maxUnzippedClusterSize" );
73+ opts.initialUnzippedPageSize = iConfig.getUntrackedParameter <unsigned long long >(" initialUnzippedPageSize" );
74+ opts.maxUnzippedPageSize = iConfig.getUntrackedParameter <unsigned long long >(" maxUnzippedPageSize" );
75+ opts.pageBufferBudget = iConfig.getUntrackedParameter <unsigned long long >(" pageBufferBudget" );
76+ opts.useBufferedWrite = iConfig.getUntrackedParameter <bool >(" useBufferedWrite" );
77+ opts.useDirectIO = iConfig.getUntrackedParameter <bool >(" useDirectIO" );
78+ return opts;
79+ }
80+ } // namespace
81+ } // namespace edm::rntuple_temp
4582namespace edm ::rntuple_temp {
4683 RNTupleTempOutputModule::RNTupleTempOutputModule (ParameterSet const & pset)
4784 : edm::one::OutputModuleBase::OutputModuleBase(pset),
@@ -65,7 +102,14 @@ namespace edm::rntuple_temp {
65102 productDependencies_(),
66103 rootOutputFile_(),
67104 statusFileName_(),
68- overrideGUID_(pset.getUntrackedParameter<std::string>(" overrideGUID" )) {
105+ overrideGUID_(pset.getUntrackedParameter<std::string>(" overrideGUID" )),
106+ noSplitSubFields_(pset.getUntrackedParameterSet(" fieldLevelOptimizations" )
107+ .getUntrackedParameter<std::vector<std::string>>(" noSplitSubFields" )),
108+ overrideStreamer_(
109+ fromConfig (pset.getUntrackedParameterSet(" fieldLevelOptimizations" )
110+ .getUntrackedParameter<std::vector<edm::ParameterSet>>(" overrideDataProductStreamer" ))),
111+ allProductsUseStreamer_(
112+ pset.getUntrackedParameterSet(" fieldLevelOptimizations" ).getUntrackedParameter<bool>(" useStreamer" )) {
69113 if (pset.getUntrackedParameter <bool >(" writeStatusFile" )) {
70114 std::ostringstream statusfilename;
71115 statusfilename << moduleLabel_ << ' _' << getpid ();
@@ -107,9 +151,8 @@ namespace edm::rntuple_temp {
107151
108152 RNTupleTempOutputModule::OutputItem::OutputItem (ProductDescription const * bd,
109153 EDGetToken const & token,
110- int splitLevel,
111- int basketSize)
112- : productDescription_(bd), token_(token), product_(nullptr ), splitLevel_(splitLevel), basketSize_(basketSize) {}
154+ bool streamerProduct)
155+ : productDescription_(bd), token_(token), product_(nullptr ), streamerProduct_(streamerProduct) {}
113156
114157 namespace {
115158 std::regex convertBranchExpression (std::string const & iGlobBranchExpression) {
@@ -120,15 +163,6 @@ namespace edm::rntuple_temp {
120163 }
121164 } // namespace
122165
123- inline bool RNTupleTempOutputModule::SpecialSplitLevelForBranch::match (std::string const & iBranchName) const {
124- return std::regex_match (iBranchName, branch_);
125- }
126-
127- std::regex RNTupleTempOutputModule::SpecialSplitLevelForBranch::convert (
128- std::string const & iGlobBranchExpression) const {
129- return convertBranchExpression (iGlobBranchExpression);
130- }
131-
132166 bool RNTupleTempOutputModule::AliasForBranch::match (std::string const & iBranchName) const {
133167 return std::regex_match (iBranchName, branch_);
134168 }
@@ -144,14 +178,12 @@ namespace edm::rntuple_temp {
144178
145179 // Fill outputItemList with an entry for each branch.
146180 for (auto const & kept : keptVector) {
147- int splitLevel = ProductDescription::invalidSplitLevel;
148- int basketSize = ProductDescription::invalidBasketSize;
149-
150181 ProductDescription const & prod = *kept.first ;
151182 if (branchType == InProcess && processName != prod.processName ()) {
152183 continue ;
153184 }
154- outputItemList.emplace_back (&prod, kept.second , splitLevel, basketSize);
185+ bool streamerProduct = allProductsUseStreamer_ or useStreamer (prod.branchName (), overrideStreamer_);
186+ outputItemList.emplace_back (&prod, kept.second , streamerProduct);
155187 }
156188 }
157189
@@ -463,6 +495,26 @@ namespace edm::rntuple_temp {
463495 desc.addUntracked (" rntupleWriteOptions" , optimizations)
464496 ->setComment (" Options to control RNTuple specific output features." );
465497 }
498+ {
499+ ParameterSetDescription fieldLevel;
500+ fieldLevel.addUntracked <std::vector<std::string>>(" noSplitSubFields" , {})
501+ ->setComment (
502+ " fully qualified subfield names for fields which should not be split. A single value of 'all' means all "
503+ " possible subfields will be unsplit" );
504+ fieldLevel.addUntracked <bool >(" useStreamer" , false )
505+ ->setComment (" Use streamer storage for top level fields when storing data products" );
506+
507+ {
508+ ParameterSetDescription specialStreamer;
509+ specialStreamer.addUntracked <std::string>(" product" )->setComment (
510+ " Name of data product needing a special split setting. The name can contain wildcards '*' and '?'" );
511+ specialStreamer.addUntracked <bool >(" useStreamer" , true )
512+ ->setComment (" Explicitly set if should or should not use streamer (default is to use streamer)" );
513+ fieldLevel.addVPSetUntracked (" overrideDataProductStreamer" , specialStreamer, {});
514+ }
515+ desc.addUntracked (" fieldLevelOptimizations" , fieldLevel)
516+ ->setComment (" Options to control specializing how Fields are stored." );
517+ }
466518 OutputModule::fillDescription (desc);
467519 }
468520
0 commit comments