@@ -102,8 +102,9 @@ void InsertBranchName(std::set<std::string> &bNamesReg, std::vector<std::string>
102102 foundLeaves.insert (leaf);
103103}
104104
105- void ExploreBranch (TTree &t, std::set<std::string> &bNamesReg, std::vector<std::string> &bNames, TBranch *b,
106- std::string prefix, std::string &friendName, bool allowDuplicates)
105+ void ExploreBranch (TTree &t, std::unordered_map<std::string, unsigned int > &duplicateTokens,
106+ std::set<std::string> &bNamesReg, std::vector<std::string> &bNames, TBranch *b, std::string prefix,
107+ std::string &friendName, bool allowDuplicates)
107108{
108109 // We want to avoid situations of overlap between the prefix and the
109110 // sub-branch name that might happen when the branch is composite, e.g.
@@ -121,6 +122,15 @@ void ExploreBranch(TTree &t, std::set<std::string> &bNamesReg, std::vector<std::
121122 for (auto sb : *b->GetListOfBranches ()) {
122123 TBranch *subBranch = static_cast <TBranch *>(sb);
123124 auto subBranchName = std::string (subBranch->GetName ());
125+
126+ // Record names of sub branches, which could reapper in different branch hierarchies of the same dataset. For
127+ // example, the 'Muon' branch could have sub-branch 'pt', as well as the 'Electron' branch. Later we will
128+ // disambiguate by removing the top-level 'pt' branch which TTree doesn't warn about and would end up pointing to
129+ // the first sub-branch encountered in this exploration
130+ if (!duplicateTokens.insert ({subBranchName, 1 }).second ) {
131+ duplicateTokens[subBranchName]++;
132+ }
133+
124134 auto fullName = prefix + subBranchName;
125135
126136 if (auto subNameFirstDot = subBranchName.find_first_of (' .' ); subNameFirstDot != std::string::npos) {
@@ -133,7 +143,7 @@ void ExploreBranch(TTree &t, std::set<std::string> &bNamesReg, std::vector<std::
133143 if (!prefix.empty ())
134144 newPrefix = fullName + " ." ;
135145
136- ExploreBranch (t, bNamesReg, bNames, subBranch, newPrefix, friendName, allowDuplicates);
146+ ExploreBranch (t, duplicateTokens, bNamesReg, bNames, subBranch, newPrefix, friendName, allowDuplicates);
137147
138148 auto branchDirectlyFromTree = t.GetBranch (fullName.c_str ());
139149 if (!branchDirectlyFromTree)
@@ -147,7 +157,8 @@ void ExploreBranch(TTree &t, std::set<std::string> &bNamesReg, std::vector<std::
147157 }
148158}
149159
150- void GetBranchNamesImpl (TTree &t, std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
160+ void GetBranchNamesImpl (TTree &t, std::unordered_map<std::string, unsigned int > &duplicateTokens,
161+ std::set<std::string> &bNamesReg, std::vector<std::string> &bNames,
151162 std::set<TTree *> &analysedTrees, std::string &friendName, bool allowDuplicates)
152163{
153164 std::set<TLeaf *> foundLeaves;
@@ -184,7 +195,7 @@ void GetBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, std::vector<
184195 }
185196 } else if (branch->IsA () == TBranchObject::Class ()) {
186197 // TBranchObject
187- ExploreBranch (t, bNamesReg, bNames, branch, branchName + " ." , friendName, allowDuplicates);
198+ ExploreBranch (t, duplicateTokens, bNamesReg, bNames, branch, branchName + " ." , friendName, allowDuplicates);
188199 InsertBranchName (bNamesReg, bNames, branchName, friendName, allowDuplicates);
189200 } else {
190201 // TBranchElement
@@ -199,9 +210,10 @@ void GetBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, std::vector<
199210 dotIsImplied = true ;
200211
201212 if (dotIsImplied || branchName.back () == ' .' )
202- ExploreBranch (t, bNamesReg, bNames, branch, " " , friendName, allowDuplicates);
213+ ExploreBranch (t, duplicateTokens, bNamesReg, bNames, branch, " " , friendName, allowDuplicates);
203214 else
204- ExploreBranch (t, bNamesReg, bNames, branch, branchName + " ." , friendName, allowDuplicates);
215+ ExploreBranch (t, duplicateTokens, bNamesReg, bNames, branch, branchName + " ." , friendName,
216+ allowDuplicates);
205217
206218 InsertBranchName (bNamesReg, bNames, branchName, friendName, allowDuplicates);
207219 }
@@ -226,19 +238,32 @@ void GetBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, std::vector<
226238 else
227239 frName = std::string (friendTree->GetName ());
228240
229- GetBranchNamesImpl (*friendTree, bNamesReg, bNames, analysedTrees, frName, allowDuplicates);
241+ GetBranchNamesImpl (*friendTree, duplicateTokens, bNamesReg, bNames, analysedTrees, frName, allowDuplicates);
230242 }
231243}
232244
233245// /////////////////////////////////////////////////////////////////////////////
234246// / Get all the branches names, including the ones of the friend trees
235247std::vector<std::string> RetrieveDatasetSchema (TTree &t, bool allowDuplicates = true )
236248{
249+ std::unordered_map<std::string, unsigned int > duplicateTokens;
250+
237251 std::set<std::string> bNamesSet;
238252 std::vector<std::string> bNames;
239253 std::set<TTree *> analysedTrees;
240254 std::string emptyFrName = " " ;
241- GetBranchNamesImpl (t, bNamesSet, bNames, analysedTrees, emptyFrName, allowDuplicates);
255+ GetBranchNamesImpl (t, duplicateTokens, bNamesSet, bNames, analysedTrees, emptyFrName, allowDuplicates);
256+
257+ // Remove all sub-branches that have duplicate names between different branch hierarchies of the dataset
258+ bNames.erase (std::remove_if (bNames.begin (), bNames.end (),
259+ [&duplicateTokens](const auto &name) {
260+ if (auto it = duplicateTokens.find (name);
261+ it != duplicateTokens.end () && it->second > 1 )
262+ return true ;
263+ return false ;
264+ }),
265+ bNames.end ());
266+
242267 return bNames;
243268}
244269} // namespace
0 commit comments