Fix technical issues

qgp · qgp · commit cefa0526c0fc · 2025-04-10T14:30:58.000+02:00
diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py
@@ -117,7 +117,7 @@ def __init__(self, datap, case, typean, period):
                 for param, symbol in zip(
                     ("mean", "sigma", "significance", "chi2"),
                     ("#it{#mu}", "#it{#sigma}", "significance", "#it{#chi}^{2}"),
-                    strict=False,
+                    strict=True,
                 )
             }
             for level in self.fit_levels
@@ -703,7 +703,7 @@ def _subtract_sideband(self, hist, var, mcordata, ipt):
             hx = project_hist(fh_sideband, (0,), {}) if get_dim(fh_sideband) > 1 else fh_sideband
             for iptjet in bins_ptjet:
                 if iptjet and hx.GetBinContent(iptjet) <= 0:
-                        continue
+                    continue
                 rws = self.roo_ws.get((mcordata, iptjet, ipt))
                 if not rws:
                     self.logger.error("Falling back to incl. roows for %s-iptjet%i-ipt%i", mcordata, iptjet, ipt)
@@ -1060,7 +1060,6 @@ def _extract_signal(self, hist, var, mcordata, ipt):
         # hres.Sumw2() # TODO: check if we should do this here
         return hres
 
-
     def estimate_feeddown(self):
         """Estimate feeddown from legacy Run 2 trees or gen-only simulation"""
         match self.cfg("fd_input", "tree"):
@@ -1096,7 +1095,9 @@ def estimate_feeddown(self):
                     colname = col_mapping.get(var, f"{var}_jet")
                     if f"{colname}" not in df:
                         if var is not None:
-                            self.logger.error("No feeddown information for %s (%s), cannot estimate feeddown", var, colname)
+                            self.logger.error(
+                                "No feeddown information for %s (%s), cannot estimate feeddown", var, colname
+                            )
                             # print(df.info(), flush=True)
                         continue
 
@@ -1109,7 +1110,9 @@ def estimate_feeddown(self):
                         bins_obs[var],
                     )
                     fill_hist_fast(h3_fd_gen_orig[var], df[["pt_jet", "pt_cand", f"{colname}"]])
-                    self._save_hist(project_hist(h3_fd_gen_orig[var], [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png")
+                    self._save_hist(
+                        project_hist(h3_fd_gen_orig[var], [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png"
+                    )
 
             case "sim":
                 # TODO: recover cross section
@@ -1121,8 +1124,11 @@ def estimate_feeddown(self):
                         if fh := rfile.Get(f"h_mass-ptjet-pthf{label}"):
                             h3_fd_gen_orig[var] = project_hist(fh, list(range(1, get_dim(fh))), {})
                             ensure_sumw2(h3_fd_gen_orig[var])
-                            self._save_hist(project_hist(h3_fd_gen_orig[var], [0, 2], {}), f"fd/h_ptjet-{var}_feeddown_genonly_noeffscaling.png")
-                powheg_xsection_scale_factor = 1. # FIXME: retrieve cross section
+                            self._save_hist(
+                                project_hist(h3_fd_gen_orig[var], [0, 2], {}),
+                                f"fd/h_ptjet-{var}_feeddown_genonly_noeffscaling.png",
+                            )
+                powheg_xsection_scale_factor = 1.0  # FIXME: retrieve cross section
 
             case fd_input:
                 self.logger.critical("Invalid feeddown input %s", fd_input)
diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py
@@ -83,8 +83,10 @@ def __init__(
         self.s_evtsel = datap["analysis"][self.typean]["evtsel"]
 
         # bins: 2d array [[low, high], ...]
-        self.bins_skimming = np.array(list(zip(self.lpt_anbinmin, self.lpt_anbinmax)), "d")  # TODO: replace with cfg
-        self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax)), "d")
+        self.bins_skimming = np.array(
+            list(zip(self.lpt_anbinmin, self.lpt_anbinmax, strict=True)), "d"
+        )  # TODO: replace with cfg
+        self.bins_analysis = np.array(list(zip(self.lpt_finbinmin, self.lpt_finbinmax, strict=True)), "d")
 
         # skimming bins in overlap with the analysis range
         self.active_bins_skim = [
@@ -143,7 +145,7 @@ def _verify_variables(self, dfi):
         for idx, row in df.iterrows():
             isSoftDropped = False
             nsd = 0
-            for zg, theta in zip(row["zg_array"], row["fTheta"]):
+            for zg, theta in zip(row["zg_array"], row["fTheta"], strict=True):
                 if zg >= self.cfg("zcut", 0.1):
                     if not isSoftDropped:
                         df.loc[idx, "zg"] = zg
@@ -178,7 +180,12 @@ def _calculate_variables(self, df, verify=False):  # pylint: disable=invalid-nam
                 df["zg"] = df["zg_array"].apply(lambda ar: next((zg for zg in ar if zg >= zcut), -0.1))
             if "rg" in observables:
                 df["rg"] = df[["zg_array", "fTheta"]].apply(
-                    (lambda ar: next((rg for (zg, rg) in zip(ar.zg_array, ar.fTheta) if zg >= zcut), -0.1)), axis=1
+                    (
+                        lambda ar: next(
+                            (rg for (zg, rg) in zip(ar.zg_array, ar.fTheta, strict=True) if zg >= zcut), -0.1
+                        )
+                    ),
+                    axis=1,
                 )
             if "nsd" in observables:
                 df["nsd"] = df["zg_array"].apply(lambda ar: len([zg for zg in ar if zg >= zcut]))
@@ -232,7 +239,7 @@ def process_histomass_single(self, index):
         self.logger.info("Processing (histomass) %s", self.l_evtorig[index])
 
         print(f"Opening file {self.l_histomass[index]}", flush=True)
-        with TFile.Open(self.l_histomass[index], "recreate") as rfile:
+        with TFile.Open(self.l_histomass[index], "recreate") as _:
             dfevtorig = read_df(self.l_evtorig[index])
             histonorm = TH1F("histonorm", "histonorm", 4, 0, 4)
             histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel)))
@@ -256,7 +263,7 @@ def process_histomass_single(self, index):
             get_axis(histonorm, 0).SetBinLabel(4, "N_{BC}^{TVX}")
             histonorm.Write()
 
-            if self.datatype != 'fd':
+            if self.datatype != "fd":
                 df = pd.concat(read_df(self.mptfiles_recosk[bin][index]) for bin in self.active_bins_skim)
             else:
                 df = pd.concat(read_df(self.mptfiles_gensk[bin][index]) for bin in self.active_bins_skim)
@@ -489,7 +496,10 @@ def process_efficiency_single(self, index):
             }
 
             for cat in cats:
-                print(f"Filling histograms for {cat}: {dfgen[cat].info()}, {dfdet[cat].info()}, {dfmatch[cat].info()}", flush=True)
+                print(
+                    f"Filling histograms for {cat}: {dfgen[cat].info()}, {dfdet[cat].info()}, {dfmatch[cat].info()}",
+                    flush=True,
+                )
                 fill_hist(h_eff[(cat, "gen")], dfgen[cat][["fJetPt_gen", "fPt_gen"]])
                 fill_hist(h_eff[(cat, "det")], dfdet[cat][["fJetPt", "fPt"]])
                 if cat in dfmatch and dfmatch[cat] is not None: