-
Notifications
You must be signed in to change notification settings - Fork 429
Add codegen to RooParametricHist #1217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -306,6 +306,167 @@ | |
| return result > 0. ? result : integralFloorVal; | ||
| } | ||
|
|
||
| inline int parametricHistFindBin(const int N_bins, const double* bins, const double x) { | ||
| if (x < bins[0] || x >= bins[N_bins]) | ||
| return -1; | ||
|
|
||
| // Search for the bin | ||
| for (int i = 0; i < N_bins; ++i) { | ||
| if (x >= bins[i] && x < bins[i + 1]) | ||
| return i; | ||
| } | ||
| return -1; | ||
| } | ||
|
|
||
| inline int parametricHistFindBin(const int N_bins, std::vector<double> const& bins, const double x) { | ||
| return parametricHistFindBin(N_bins, bins.data(), x); | ||
| } | ||
|
|
||
| inline Double_t parametricHistMorphScale(const double parVal, | ||
| const int nMorphs, | ||
| const double* morphCoeffs, | ||
| const double* morphDiffs, | ||
| const double* morphSums, | ||
| double smoothRegion) { | ||
| double morphScale = 1.0; | ||
| if (!morphDiffs || !morphSums) | ||
| return morphScale; | ||
| for (int i = 0; i < nMorphs; ++i) { | ||
| double coeff = morphCoeffs[i]; | ||
| double a = 0.5 * coeff; | ||
| double b = smoothStepFunc(coeff, smoothRegion); | ||
| morphScale *= 1 + (1.0 / parVal) * a * (morphDiffs[i] + b * morphSums[i]); | ||
| } | ||
| return morphScale; | ||
| } | ||
|
|
||
| inline Double_t parametricHistEvaluate(const int bin_i, | ||
| const double* parVals, | ||
| const double* bins, | ||
| const int N_bins, | ||
| const double* morphCoeffs, | ||
| const int nMorphs, | ||
| const double* morphDiffs, | ||
| const double* morphSums, | ||
| const double* widths, | ||
| const double smoothRegion) { | ||
| if (bin_i < 0) | ||
| return 0.0; | ||
| // Morphing case | ||
| if (morphCoeffs != nullptr && nMorphs > 0) { | ||
| // morphDiffs and morphSums are flattened arrays of size N_bins * nMorphs | ||
| const double* binMorphDiffs = nullptr; | ||
| const double* binMorphSums = nullptr; | ||
| if (morphDiffs) { | ||
| binMorphDiffs = morphDiffs + bin_i * nMorphs; | ||
| } | ||
| if (morphSums) { | ||
| binMorphSums = morphSums + bin_i * nMorphs; | ||
| } | ||
| double parVal = parVals[bin_i]; | ||
| double scale = parametricHistMorphScale(parVal, nMorphs, morphCoeffs, binMorphDiffs, binMorphSums, smoothRegion); | ||
| return (parVal * scale) / widths[bin_i]; | ||
| } | ||
| // No morphing case | ||
| return parVals[bin_i] / widths[bin_i]; | ||
| } | ||
|
|
||
| inline Double_t parametricMorphFunction(const int j, | ||
| const double parVal, | ||
| const bool hasMorphs, | ||
| const int nMorphs, | ||
| const double* morphCoeffs, | ||
| const double* morphDiffs, | ||
| const double* morphSums, | ||
| double smoothRegion) { | ||
| double morphScale = 1.0; | ||
| if (!hasMorphs) | ||
| return morphScale; | ||
|
|
||
| int ndim = nMorphs; | ||
| // apply all morphs one by one to the bin | ||
| // almost certaintly a faster way to do this in a vectorized way .... | ||
| for (int i = 0; i < ndim; ++i) { | ||
| double x = morphCoeffs[i]; | ||
| double a = 0.5 * x, b = smoothStepFunc(x, smoothRegion); | ||
| morphScale *= 1 + (1. / parVal) * a * (morphDiffs[j * nMorphs + i] + b * morphSums[j * nMorphs + i]); | ||
| } | ||
| return morphScale; | ||
| } | ||
|
|
||
| inline Double_t parametricHistFullSum(const double* parVals, | ||
| const int nBins, | ||
| const bool hasMorphs, | ||
| const int nMorphs, | ||
| const double* morphCoeffs, | ||
| const double* morphDiffs, | ||
| const double* morphSums, | ||
| double smoothRegion) { | ||
| double sum = 0; | ||
| for (int i = 0; i < nBins; ++i) { | ||
| double thisVal = parVals[i]; | ||
| if (hasMorphs) { | ||
| // Apply morphing to this bin, just like in RooParametricHist::evaluate | ||
| thisVal *= | ||
| parametricMorphFunction(i, thisVal, hasMorphs, nMorphs, morphCoeffs, morphDiffs, morphSums, smoothRegion); | ||
| } | ||
| sum += thisVal; | ||
| } | ||
| return sum; | ||
| } | ||
|
|
||
| inline Double_t parametricHistIntegral(const double* parVals, | ||
| const double* bins, | ||
| const int N_bins, | ||
| const double* morphCoeffs, | ||
| const int nMorphs, | ||
| const double* morphDiffs, | ||
| const double* morphSums, | ||
| const double* widths, | ||
| const double smoothRegion, | ||
| const char* rangeName, | ||
| const double xmin, | ||
| const double xmax) { | ||
| // No ranges | ||
| if (!rangeName) { | ||
| return parametricHistFullSum( | ||
| parVals, N_bins, morphCoeffs != nullptr, nMorphs, morphCoeffs, morphDiffs, morphSums, smoothRegion); | ||
| } | ||
|
|
||
| // Case with ranges, calculate integral explicitly | ||
| double sum = 0; | ||
| int i; | ||
| for (i = 1; i <= N_bins; i++) { | ||
| // Get maybe-morphed bin value | ||
| double binVal = parVals[i - 1] / widths[i - 1]; | ||
| if (morphCoeffs != nullptr) { | ||
| binVal *= parametricMorphFunction( | ||
| i - 1, parVals[i - 1], true, nMorphs, morphCoeffs, morphDiffs, morphSums, smoothRegion); | ||
| } | ||
|
|
||
| if (bins[i - 1] >= xmin && bins[i] <= xmax) { | ||
| // Bin fully in integration domain | ||
| sum += (bins[i] - bins[i - 1]) * binVal; | ||
| } else if (bins[i - 1] < xmin && bins[i] > xmax) { | ||
| // Domain is fully contained in this bin | ||
| sum += (xmax - xmin) * binVal; | ||
| // Exit here, this is the last bin to be processed by construction | ||
| double fullSum = parametricHistFullSum( | ||
| parVals, N_bins, morphCoeffs != nullptr, nMorphs, morphCoeffs, morphDiffs, morphSums, smoothRegion); | ||
| return sum / fullSum; | ||
| } else if (bins[i - 1] < xmin && bins[i] <= xmax && bins[i] > xmin) { | ||
| // Lower domain boundary is in bin | ||
| sum += (bins[i] - xmin) * binVal; | ||
| } else if (bins[i - 1] >= xmin && bins[i] > xmax && bins[i - 1] < xmax) { | ||
| // Upper domain boundary is in bin | ||
| // Exit here, this is the last bin to be processed by construction | ||
| sum += (xmax - bins[i - 1]) * binVal; | ||
| return sum; | ||
| } | ||
| } | ||
| return sum; | ||
|
Comment on lines
+439
to
+467
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's find and read the full parametricHistIntegral function
find . -name "CombineMathFuncs.h" -o -name "CombineMathFuncs.cxx" | head -20Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 114 🏁 Script executed: # Search for the parametricHistIntegral function to understand its full context
rg "parametricHistIntegral" --type=cpp -n -B 5 -A 60 | head -150Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 10752 🏁 Script executed: # Search for analyticalIntegral and related implementations to check the original pattern
rg "analyticalIntegral|getFullSum" --type=cpp -n -B 3 -A 15 | head -200Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 12230 🏁 Script executed: # Search for test cases that use parametricHistIntegral with ranges
rg -n "parametricHistIntegral|RooParametricHist" --type=cpp -B 2 -A 2 | grep -A 5 -B 5 "range\|integral"Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 5005 🏁 Script executed: # Look for git history or comments explaining the normalization logic
rg -n "fullSum|normalize|parametricHistIntegral" --type=h -B 3 -A 3Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 7983 🏁 Script executed: # Check if there are test files or expected behavior documentation
fd -e "test" -o -name "*Test*" -o -name "*test*" | head -20Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 314 🏁 Script executed: # Look at the parametricHistFullSum function to understand what it computes
rg -n "parametricHistFullSum" --type=cpp -B 5 -A 15 | head -80Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 6053 Inconsistent normalization in ranged integral computation. Line 456 normalizes the result by dividing by 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| } // namespace MathFuncs | ||
| } // namespace Detail | ||
| } // namespace RooFit | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| #include "../interface/VerticalInterpHistPdf.h" | ||
| #include "../interface/VerticalInterpPdf.h" | ||
| #include "../interface/CombineMathFuncs.h" | ||
| #include "../interface/RooParametricHist.h" | ||
|
|
||
| #include <RooUniformBinning.h> | ||
|
|
||
|
|
@@ -242,6 +243,33 @@ | |
| arg.quadraticAlgo())); | ||
| } | ||
|
|
||
| void RooFit::Experimental::codegenImpl(RooParametricHist& arg, CodegenContext& ctx) { | ||
| std::vector<double> diffs_flat; | ||
| std::vector<double> sums_flat; | ||
| if (arg.hasMorphs()) { | ||
| arg.getFlattenedMorphs(diffs_flat, sums_flat); | ||
| } | ||
|
|
||
| std::string xName = ctx.getResult(arg.observable()); | ||
| std::stringstream bin_i; | ||
| bin_i << ctx.buildCall("RooFit::Detail::MathFuncs::parametricHistFindBin", arg.getNBins(), arg.getBins(), xName); | ||
| std::stringstream code; | ||
| code << ctx.buildCall("RooFit::Detail::MathFuncs::parametricHistEvaluate", | ||
| bin_i.str(), | ||
| arg.getPars(), | ||
| arg.getBins(), | ||
| arg.getNBins(), | ||
| arg.getCoeffList(), | ||
| static_cast<int>(arg.getCoeffList().size()), | ||
| arg.hasMorphs() ? diffs_flat : std::vector<double>{}, | ||
| arg.hasMorphs() ? sums_flat : std::vector<double>{}, | ||
| arg.getWidths(), | ||
| arg.getSmoothRegion()) + | ||
| ";\n"; | ||
| ctx.addToCodeBody(code.str()); | ||
| ctx.addResult(&arg, code.str()); | ||
| } | ||
|
Comment on lines
+246
to
+271
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's locate and examine the file
find . -name "CombineCodegenImpl.cxx" -type fRepository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 112 🏁 Script executed: # Check how other codegen implementations use addResult
ast-grep --pattern 'ctx.addResult($_, $$$)'Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 2238 🏁 Script executed: # Also search for the specific file and examine the context around the mentioned lines
rg -n "codegenImpl.*RooParametricHist" --type cpp -A 30Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 3093 🏁 Script executed: # Let's examine the FastVerticalInterpHistPdf2 implementation around lines 120-121
sed -n '100,140p' src/CombineCodegenImpl.cxxRepository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 1464 🏁 Script executed: # Let's also check if there are other patterns where code is added to body and result separately
rg "ctx.addToCodeBody|ctx.addResult" src/CombineCodegenImpl.cxx -B 3 -A 1Repository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 1164 🏁 Script executed: # Look at the exact sequence in the problematic function
sed -n '246,271p' src/CombineCodegenImpl.cxxRepository: cms-analysis/HiggsAnalysis-CombinedLimit Length of output: 1222
Store the Proposed fix sketch+ std::string callExpr = ctx.buildCall("RooFit::Detail::MathFuncs::parametricHistEvaluate",
+ bin_i.str(),
+ arg.getPars(),
+ arg.getBins(),
+ arg.getNBins(),
+ arg.getCoeffList(),
+ static_cast<int>(arg.getCoeffList().size()),
+ arg.hasMorphs() ? diffs_flat : std::vector<double>{},
+ arg.hasMorphs() ? sums_flat : std::vector<double>{},
+ arg.getWidths(),
+ arg.getSmoothRegion());
+
+ std::string tmpVar = ctx.getTmpVarName();
std::stringstream code;
- code << ctx.buildCall("RooFit::Detail::MathFuncs::parametricHistEvaluate",
- bin_i.str(),
- arg.getPars(),
- arg.getBins(),
- arg.getNBins(),
- arg.getCoeffList(),
- static_cast<int>(arg.getCoeffList().size()),
- arg.hasMorphs() ? diffs_flat : std::vector<double>{},
- arg.hasMorphs() ? sums_flat : std::vector<double>{},
- arg.getWidths(),
- arg.getSmoothRegion()) +
- ";\n";
- ctx.addToCodeBody(code.str());
- ctx.addResult(&arg, code.str());
+ code << "double " << tmpVar << " = " << callExpr << ";\n";
+ ctx.addToCodeBody(code.str());
+ ctx.addResult(&arg, tmpVar);🤖 Prompt for AI Agents |
||
|
|
||
| std::string RooFit::Experimental::codegenIntegralImpl(VerticalInterpPdf& arg, | ||
| int code, | ||
| const char* rangeName, | ||
|
|
@@ -257,4 +285,31 @@ | |
| arg.quadraticAlgo()); | ||
| } | ||
|
|
||
| std::string RooFit::Experimental::codegenIntegralImpl(RooParametricHist& arg, | ||
| int code, | ||
| const char* rangeName, | ||
| CodegenContext& ctx) { | ||
| std::vector<double> diffs_flat; | ||
| std::vector<double> sums_flat; | ||
| if (arg.hasMorphs()) { | ||
| arg.getFlattenedMorphs(diffs_flat, sums_flat); | ||
| } | ||
| double xMin = rangeName ? arg.getObs().getMin(rangeName) : -1; | ||
| double xMax = rangeName ? arg.getObs().getMax(rangeName) : -1; // if rangeName is null, these values won't be used | ||
|
|
||
| return ctx.buildCall("RooFit::Detail::MathFuncs::parametricHistIntegral", | ||
| arg.getPars(), | ||
| arg.getBins(), | ||
| arg.getNBins(), | ||
| arg.getCoeffList(), | ||
| static_cast<int>(arg.getCoeffList().size()), | ||
| arg.hasMorphs() ? diffs_flat : std::vector<double>{}, | ||
| arg.hasMorphs() ? sums_flat : std::vector<double>{}, | ||
| arg.getWidths(), | ||
| arg.getSmoothRegion(), | ||
| rangeName ? std::string("\"") + rangeName + "\"" : std::string("nullptr"), | ||
| xMin, | ||
| xMax); | ||
| } | ||
|
|
||
| #endif // ROOT_VERSION_CODE >= ROOT_VERSION(6,36,0) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: cms-analysis/HiggsAnalysis-CombinedLimit
Length of output: 3350
🏁 Script executed:
Repository: cms-analysis/HiggsAnalysis-CombinedLimit
Length of output: 311
🏁 Script executed:
Repository: cms-analysis/HiggsAnalysis-CombinedLimit
Length of output: 1386
Add guard for
parVal == 0in parametric morph functions.The functions
parametricHistMorphScale(line 338) andparametricMorphFunction(line 392) both divide byparValwithout checking if it is zero. WhenparVal == 0, this producesinf, and subsequent calculations like(parVal * scale) / widths[bin_i]result inNaN. Add guards to both functions:inline Double_t parametricHistMorphScale(const double parVal, const int nMorphs, const double* morphCoeffs, const double* morphDiffs, const double* morphSums, double smoothRegion) { double morphScale = 1.0; if (!morphDiffs || !morphSums) return morphScale; + if (parVal == 0.0) + return morphScale; for (int i = 0; i < nMorphs; ++i) {Apply the same guard to
parametricMorphFunction(line 374-395) at line 390.📝 Committable suggestion
🤖 Prompt for AI Agents