Skip to content

Commit ad12e52

Browse files
committed
DNN v3 with timing info, corrected seed pT
1 parent aec8400 commit ad12e52

File tree

4 files changed

+166
-60
lines changed

4 files changed

+166
-60
lines changed

RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
// Author: Theo Cuisset - [email protected]
33
// Date: 11/2023
44

5+
// Modified by Gamze Sokmen
6+
// Changes: Implementation of the delta time feature under a new DNN input version (v3) for the superclustering DNN and correcting the seed pT calculation.
7+
// Date: 07/2025
8+
59
#ifndef __RecoHGCal_TICL_SuperclusteringDNNInputs_H__
610
#define __RecoHGCal_TICL_SuperclusteringDNNInputs_H__
711

@@ -87,6 +91,38 @@ namespace ticl {
8791
}
8892
};
8993

94+
/* Third version of DNN by Gamze Sokmen, making use of time information as new variables.
95+
Uses features : ['DeltaEta', 'DeltaPhi', 'multi_en', 'multi_eta', 'multi_pt', 'seedEta','seedPhi','seedEn', 'seedPt', theta', 'theta_xz_seedFrame', 'theta_yz_seedFrame', 'theta_xy_cmsFrame', 'theta_yz_cmsFrame', 'theta_xz_cmsFrame', 'explVar', 'explVarRatio', 'mod_deltaTime']
96+
*/
97+
98+
class SuperclusteringDNNInputV3 : public AbstractSuperclusteringDNNInput {
99+
public:
100+
unsigned int featureCount() const override { return 18; }
101+
102+
std::vector<float> computeVector(ticl::Trackster const& ts_base, ticl::Trackster const& ts_toCluster) override;
103+
104+
std::vector<std::string> featureNames() const override {
105+
return {"DeltaEtaBaryc",
106+
"DeltaPhiBaryc",
107+
"multi_en",
108+
"multi_eta",
109+
"multi_pt",
110+
"seedEta",
111+
"seedPhi",
112+
"seedEn",
113+
"seedPt",
114+
"theta",
115+
"theta_xz_seedFrame",
116+
"theta_yz_seedFrame",
117+
"theta_xy_cmsFrame",
118+
"theta_yz_cmsFrame",
119+
"theta_xz_cmsFrame",
120+
"explVar",
121+
"explVarRatio",
122+
"mod_deltaTime"};
123+
}
124+
};
125+
90126
std::unique_ptr<AbstractSuperclusteringDNNInput> makeSuperclusteringDNNInputFromString(std::string dnnVersion);
91127
} // namespace ticl
92128

RecoHGCal/TICL/plugins/SuperclusteringSampleDumper.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ void SuperclusteringSampleDumper::analyze(const edm::Event& evt, const edm::Even
151151
std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0);
152152
std::stable_sort(
153153
trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) {
154-
return (*inputTracksters)[i1].raw_pt() > (*inputTracksters)[i2].raw_pt();
154+
return (*inputTracksters)[i1].raw_energy()*std::sin((*inputTracksters)[i1].barycenter().Theta()) > (*inputTracksters)[i2].raw_energy()*std::sin((*inputTracksters)[i2].barycenter().Theta());
155155
});
156156

157157
// Order of loops are reversed compared to SuperclusteringProducer (here outer is seed, inner is candidate), for performance reasons.
@@ -162,7 +162,7 @@ void SuperclusteringSampleDumper::analyze(const edm::Event& evt, const edm::Even
162162
trackstersIndicesPt[ts_seed_idx_pt]; // Index of seed trackster in input collection (not in pT sorted collection)
163163
Trackster const& ts_seed = (*inputTracksters)[ts_seed_idx_input];
164164

165-
if (ts_seed.raw_pt() < seedPtThreshold_)
165+
if (ts_seed.raw_energy()*std::sin(ts_seed.barycenter().Theta()) < seedPtThreshold_)
166166
break; // All further seeds will have lower pT than threshold (due to pT sorting)
167167

168168
if (!checkExplainedVarianceRatioCut(ts_seed))
@@ -265,8 +265,8 @@ void SuperclusteringSampleDumper::fillDescriptions(edm::ConfigurationDescription
265265
->setComment("Input trackster collection, same as what is used for superclustering inference.");
266266
desc.add<edm::InputTag>("recoToSimAssociatorCP",
267267
edm::InputTag("tracksterSimTracksterAssociationLinkingbyCLUE3D", "recoToSim"));
268-
desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true),
269-
edm::allowedValues<std::string>("v1", "v2"))
268+
desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v3", true),
269+
edm::allowedValues<std::string>("v1", "v2", "v3"))
270270
->setComment(
271271
"DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN.");
272272
// Cuts are intentionally looser than those used for inference in TracksterLinkingBySuperClustering.cpp

RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.cc

Lines changed: 62 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters(
116116
std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0);
117117
std::stable_sort(
118118
trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) {
119-
return inputTracksters[i1].raw_pt() > inputTracksters[i2].raw_pt();
119+
return inputTracksters[i1].raw_energy()*std::sin(inputTracksters[i1].barycenter().Theta()) > inputTracksters[i2].raw_energy()*std::sin(inputTracksters[i2].barycenter().Theta());
120120
});
121121

122122
/* Evaluate in minibatches since running with trackster count = 3000 leads to a short-lived ~15GB memory allocation
@@ -146,7 +146,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters(
146146
Trackster const& ts_cand = inputTracksters[trackstersIndicesPt[ts_cand_idx_pt]];
147147

148148
if (ts_cand.raw_energy() < candidateEnergyThreshold_ ||
149-
!checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand)
149+
// !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand)
150+
!checkExplainedVarianceRatioCut(ts_cand))// || !trackstersPassesPIDCut(ts_cand))
150151
continue;
151152

152153
auto& tracksterTiles = tracksterTilesBothEndcaps_pt[ts_cand.barycenter().eta() > 0];
@@ -164,7 +165,7 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters(
164165

165166
Trackster const& ts_seed = inputTracksters[trackstersIndicesPt[ts_seed_idx_pt]];
166167

167-
if (ts_seed.raw_pt() < seedPtThreshold_)
168+
if (ts_seed.raw_energy()*std::sin(ts_seed.barycenter().Theta()) < seedPtThreshold_)
168169
break; // All further seeds will have lower pT than threshold (due to pT sorting)
169170

170171
if (!checkExplainedVarianceRatioCut(ts_seed) || !trackstersPassesPIDCut(ts_seed))
@@ -245,87 +246,95 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters(
245246
Also mask seeds (only needed to add tracksters not in a supercluster to the output). */
246247
std::vector<bool> tracksterMask(tracksterCount, false);
247248

248-
/* Index of the seed trackster of the previous iteration
249-
Initialized with an id that cannot be obtained in input */
249+
250+
/////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD
251+
250252
unsigned int previousCandTrackster_idx = std::numeric_limits<unsigned int>::max();
251253
unsigned int bestSeedForCurrentCandidate_idx = std::numeric_limits<unsigned int>::max();
252254
float bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_;
253-
254-
// Lambda to be called when there is a transition from one candidate to the next (as well as after the last iteration)
255-
// Does the actual supercluster creation
255+
256+
// Track which tracksters were ever used as candidates
257+
std::vector<bool> usedAsCandidate(tracksterCount, false);
258+
259+
256260
auto onCandidateTransition = [&](unsigned ts_cand_idx) {
257-
if (bestSeedForCurrentCandidate_idx <
258-
std::numeric_limits<unsigned int>::max()) { // At least one seed can be superclustered with the candidate
259-
tracksterMask[ts_cand_idx] = true; // Mask the candidate so it is not considered as seed in later iterations
260-
261-
// Look for a supercluster of the seed
262-
std::vector<std::vector<unsigned int>>::iterator seed_supercluster_it =
263-
std::find_if(outputSuperclusters.begin(),
264-
outputSuperclusters.end(),
265-
[bestSeedForCurrentCandidate_idx](std::vector<unsigned int> const& sc) {
266-
return sc[0] == bestSeedForCurrentCandidate_idx;
267-
});
268-
269-
if (seed_supercluster_it == outputSuperclusters.end()) { // No supercluster exists yet for the seed. Create one.
270-
outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
271-
resultTracksters.emplace_back(inputTracksters[bestSeedForCurrentCandidate_idx]);
272-
linkedTracksterIdToInputTracksterId.emplace_back(
273-
std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
274-
seed_supercluster_it = outputSuperclusters.end() - 1;
275-
tracksterMask[bestSeedForCurrentCandidate_idx] =
276-
true; // mask the seed as well (needed to find tracksters not in any supercluster)
261+
if (bestSeedForCurrentCandidate_idx < std::numeric_limits<unsigned int>::max()) {
262+
tracksterMask[ts_cand_idx] = true; // Mask the candidate so it’s not reused as a seed
263+
usedAsCandidate[ts_cand_idx] = true;
264+
265+
// Find the supercluster the seed belongs to (even if it's already used in another supercluster)
266+
// Find existing supercluster for the seed
267+
auto seed_supercluster_it =
268+
std::find_if(outputSuperclusters.begin(),
269+
outputSuperclusters.end(),
270+
[bestSeedForCurrentCandidate_idx](const std::vector<unsigned int>& sc) {
271+
return sc[0] == bestSeedForCurrentCandidate_idx;
272+
});
273+
if (seed_supercluster_it == outputSuperclusters.end()) {
274+
// No supercluster exists for this seed, create one
275+
outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
276+
resultTracksters.emplace_back(inputTracksters[bestSeedForCurrentCandidate_idx]);
277+
linkedTracksterIdToInputTracksterId.emplace_back(
278+
std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx});
279+
seed_supercluster_it = outputSuperclusters.end() - 1;
280+
tracksterMask[bestSeedForCurrentCandidate_idx] = true;
277281
}
278-
// Index of the supercluster into resultTracksters, outputSuperclusters and linkedTracksterIdToInputTracksterId collections (the indices are the same)
282+
279283
unsigned int indexIntoOutputTracksters = seed_supercluster_it - outputSuperclusters.begin();
280284
seed_supercluster_it->push_back(ts_cand_idx);
281285
resultTracksters[indexIntoOutputTracksters].mergeTracksters(inputTracksters[ts_cand_idx]);
282286
linkedTracksterIdToInputTracksterId[indexIntoOutputTracksters].push_back(ts_cand_idx);
283-
287+
284288
assert(outputSuperclusters.size() == resultTracksters.size() &&
285-
outputSuperclusters.size() == linkedTracksterIdToInputTracksterId.size());
289+
outputSuperclusters.size() == linkedTracksterIdToInputTracksterId.size());
286290
assert(seed_supercluster_it->size() == linkedTracksterIdToInputTracksterId[indexIntoOutputTracksters].size());
287-
291+
288292
bestSeedForCurrentCandidate_idx = std::numeric_limits<unsigned int>::max();
289293
bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_;
290294
}
291295
};
292-
293-
//Iterate over minibatches
296+
297+
// Iterate over minibatches
294298
for (unsigned int batchIndex = 0; batchIndex < batchOutputs.size(); batchIndex++) {
295-
std::vector<float> const& currentBatchOutputs = batchOutputs[batchIndex]; // DNN score outputs
296-
// Iterate over seed-candidate pairs inside current minibatch
299+
std::vector<float> const& currentBatchOutputs = batchOutputs[batchIndex];
300+
297301
for (unsigned int indexInBatch = 0; indexInBatch < tracksterIndicesUsedInDNN[batchIndex].size(); indexInBatch++) {
298302
assert(indexInBatch < static_cast<unsigned int>(batchOutputs[batchIndex].size()));
299-
303+
300304
const unsigned int ts_seed_idx = tracksterIndicesUsedInDNN[batchIndex][indexInBatch].first;
301305
const unsigned int ts_cand_idx = tracksterIndicesUsedInDNN[batchIndex][indexInBatch].second;
302306
const float currentDnnScore = currentBatchOutputs[indexInBatch];
303-
307+
304308
if (previousCandTrackster_idx != std::numeric_limits<unsigned int>::max() &&
305-
ts_cand_idx != previousCandTrackster_idx) {
306-
// There is a transition from one seed to the next (don't make a transition for the first iteration)
307-
onCandidateTransition(previousCandTrackster_idx);
309+
ts_cand_idx != previousCandTrackster_idx) {
310+
onCandidateTransition(previousCandTrackster_idx);
308311
}
309-
310-
if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !tracksterMask[ts_seed_idx]) {
311-
// Check that the DNN suggests superclustering, that this seed-candidate assoc is better than previous ones, and that the seed is not already in a supercluster as candidate
312-
bestSeedForCurrentCandidate_idx = ts_seed_idx;
313-
bestSeedForCurrentCandidate_dnnScore = currentDnnScore;
312+
313+
// Ignore seed if it was previously used as a candidate
314+
if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !usedAsCandidate[ts_seed_idx]) {
315+
bestSeedForCurrentCandidate_idx = ts_seed_idx;
316+
bestSeedForCurrentCandidate_dnnScore = currentDnnScore;
314317
}
318+
315319
previousCandTrackster_idx = ts_cand_idx;
316320
}
317321
}
318322
onCandidateTransition(previousCandTrackster_idx);
319-
320-
// Adding one-trackster superclusters for all tracksters not in a supercluster already that pass the seed threshold
323+
324+
// Create singleton superclusters for unused tracksters with enough pt
321325
for (unsigned int ts_id = 0; ts_id < tracksterCount; ts_id++) {
322-
if (!tracksterMask[ts_id] && inputTracksters[ts_id].raw_pt() >= seedPtThreshold_) {
326+
if (!tracksterMask[ts_id] && inputTracksters[ts_id].raw_energy()*std::sin(inputTracksters[ts_id].barycenter().Theta()) >= seedPtThreshold_) {
323327
outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{ts_id});
324328
resultTracksters.emplace_back(inputTracksters[ts_id]);
325329
linkedTracksterIdToInputTracksterId.emplace_back(std::initializer_list<unsigned int>{ts_id});
326330
}
327331
}
328-
332+
333+
334+
335+
/////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD
336+
337+
329338
#ifdef EDM_ML_DEBUG
330339
for (std::vector<unsigned int> const& sc : outputSuperclusters) {
331340
std::ostringstream s;
@@ -340,8 +349,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters(
340349
void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSetDescription& desc) {
341350
TracksterLinkingAlgoBase::fillPSetDescription(desc); // adds algo_verbosity
342351
desc.add<edm::FileInPath>("onnxModelPath")->setComment("Path to DNN (as ONNX model)");
343-
desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true),
344-
edm::allowedValues<std::string>("v1", "v2"))
352+
desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v3", true),
353+
edm::allowedValues<std::string>("v1", "v2", "v3"))
345354
->setComment(
346355
"DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN.");
347356
desc.add<unsigned int>("inferenceBatchSize", 1e5)
@@ -379,4 +388,4 @@ void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSet
379388
{static_cast<int>(Trackster::ParticleType::photon), static_cast<int>(Trackster::ParticleType::electron)})
380389
->setComment("List of PID particle types (ticl::Trackster::ParticleType enum) to consider for PID filtering");
381390
desc.add<double>("PIDThreshold", 0.8)->setComment("PID score threshold");
382-
}
391+
}

0 commit comments

Comments
 (0)