alisw · saganatt · Oct 9, 2024 · Oct 28, 2024 · Jul 15, 2025 · Jul 17, 2025
diff --git a/machine_learning_hep/scripts-dhadrons/README.md b/machine_learning_hep/scripts-dhadrons/README.md
@@ -0,0 +1,11 @@
+# Helper scripts for inclusive hadron analysis
+
+- adjusting-run2-run3: fix Run 3 plots for comparison with Run 2 results
+- debugging: verify different stages of MLHEP processing
+- merging: merge results from different MLHEP and cut variation runs
+- multitrial: a workflow to perform the multitrial (raw yield) systematics with MLHEP
+- preliminary-plots: scripts to plot invariant mass fits and cut variation results for ALICE preliminaries
+- run-mlhep: automate MLHEP running
+- systematics: obtain various comparison plots, esp. for systematics and final analysis results
+
+See README files in each subfolder.
diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/README.md
@@ -0,0 +1,34 @@
+# Scripts to fix histograms for comparison with Run 2 results
+
+## Add pT bins to extend the x-axis range on the plots
+
+File: `add_pt_bins.py`<br>
+Usage: `python add_pt_bins.py in_file.root histname out_file.root`
+
+ROOT does not allow nicely to plot a histogram on a plot with x-axis wider than histogram minimum and maximum bins.
+
+This script takes the `histname` histogram from `in_file.root`, and creates a new histogram with added bin [0.0, `histname`'s minimum) and [`histname`'s maximum, 24.0). `0` and `24.0` can be changed in the Python code. The new histogram has marker and line styles copied forom the old one, and is saved in `out_file.root`.
+
+You can uncomment lines 53-64 to get a formula for merging 2 bins. You need to adjust the indices of bins to merge. This is useful if you want to compare `histname` against less granular results from elsewhere.
+
+## Restrict the maximum of x-axis
+
+File: `remove_high_pt.py`<br>
+Usage: `python remove_high_pt.py in_file.root histname out_file.root maxval`
+
+This is a contrary script to the previous one.
+Here, `out_file.root` will contain histograms, where the last x-axis bin contains `maxval`. Higher bins are removed.<br>
+`histname` is a pattern (substring) of histogram names.
+
+## Rescale and merge cross section results
+
+Files: `modify_crosssec_run2.py`, `modify_crosssec_run3.py`<br>
+Usage: `python modify_crosssec_run2.py in_file.root histname out_histname out_file.root`
+
+The Run 2 script scales `histname` from `in_file.root` by 1./BR and merges bins, whose indices are provided in the script. The output is saved under name `out_histname` in `out_file.root`.
+
+The Run 3 script only rescales the input histogram and saves the result in `out_histname` in `out_file.root`.
+
+The lines commented out provide more examples of rescaling.
+
+For Lc prompt cross section obtained during March 2025 approvals, only the uncommented lines in both files were used.
diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/add_pt_bins.py
@@ -0,0 +1,75 @@
+# pylint: disable=missing-function-docstring
+"""
+file:  add_pt_bins.py
+brief: Add 0-1 and 24-25 dummy pT bins to extend x-range of input histogram.
+usage: python3 add_pt_bins.py file.root my_histo file_out.root
+author: Maja Karwowska <[email protected]>, Warsaw University of Technology
+"""
+
+import argparse
+from array import array
+
+from ROOT import TH1F, TFile, gROOT  # pylint: disable=import-error,no-name-in-module
+
+
+def main():
+    """
+    Main function.
+    """
+    gROOT.SetBatch(True)
+
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("filename", help="input file with histogram")
+    parser.add_argument("histname", help="histogram name")
+    parser.add_argument("outname", help="output file for the new histogram")
+    args = parser.parse_args()
+
+    with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout:
+        hist = fin.Get(args.histname)
+        hist.SetDirectory(0)
+        first_bin = 1
+        # last_bin = hist.GetXaxis().FindBin(12.0)
+        last_bin = hist.GetNbinsX()
+        bins = [0.0]
+        # bins = []
+        empty_bins = len(bins)
+        for binn in range(first_bin, last_bin + 1):
+            bins.append(hist.GetBinLowEdge(binn))
+        # last_bins = [24.0, 25.0]
+        last_bins = [24.0]
+        bins += last_bins
+        print(f"Hist bins {bins}")
+        hist2 = TH1F(args.histname, "", len(bins) - 1, array('d', bins))
+        for binn in range(empty_bins, last_bin + 1):
+            hist2.SetBinContent(binn + 1, hist.GetBinContent(binn + 1 - empty_bins))
+            hist2.SetBinError(binn + 1, hist.GetBinError(binn + 1 - empty_bins))
+            print(f"Setting bin {binn + 1} low edge {hist2.GetBinLowEdge(binn + 1)} " \
+                  f"up edge {hist2.GetXaxis().GetBinUpEdge(binn + 1)} content to content " \
+                  f"from bin {binn + 1 - empty_bins}: {hist2.GetBinContent(binn + 1)}")
+        # Formula for merging 2 bins. For example, to compare with less granular Run 2 results.
+        # last_bin = hist2.GetNbinsX()
+        # width_combined = hist.GetBinWidth(hist.GetNbinsX() -1) + hist.GetBinWidth(hist.GetNbinsX())
+        # hist2.SetBinContent(last_bin,
+        #                     ((hist.GetBinContent(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) +\
+        #                      hist.GetBinContent(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX())) /\
+        #                     width_combined))
+        # hist2.SetBinError(last_bin,
+        #                   math.sqrt((hist.GetBinError(hist.GetNbinsX() - 1) * hist.GetBinWidth(hist.GetNbinsX() - 1) \
+        #                             / width_combined) **2  +\
+        #                             (hist.GetBinError(hist.GetNbinsX()) * hist.GetBinWidth(hist.GetNbinsX()) /\
+        #                             width_combined) ** 2))
+        # print(f"Setting bin {last_bin} low edge {hist2.GetBinLowEdge(last_bin)} " \
+        #       f"up edge {hist2.GetXaxis().GetBinUpEdge(last_bin)} content to content " \
+        #       f"from bins {hist.GetNbinsX()-1}, {hist.GetNbinsX()}: {hist2.GetBinContent(last_bin)}")
+        hist2.SetMarkerSize(hist.GetMarkerSize())
+        hist2.SetMarkerColor(hist.GetMarkerColor())
+        hist2.SetMarkerStyle(hist.GetMarkerStyle())
+        hist2.SetLineWidth(hist.GetLineWidth())
+        hist2.SetLineColor(hist.GetLineColor())
+        hist2.SetLineStyle(hist.GetLineStyle())
+        fout.cd()
+        hist2.Write()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run2.py
@@ -0,0 +1,77 @@
+# pylint: disable=missing-function-docstring
+"""
+file:  modify_input.py
+brief: Perform adjustments on the input Run 2 cross section histogram.
+usage: python3 modify_crosssec_run2.py file.root my_histo out_histo file_out.root
+author: Maja Karwowska <[email protected]>, Warsaw University of Technology
+"""
+
+import argparse
+import math
+from array import array
+
+from ROOT import TH1F, TFile, gROOT  # pylint: disable=import-error,no-name-in-module
+
+OUTPUT_BINS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 24]
+BR = 0.0623
+
+def main():
+    """
+    Main function.
+    """
+    gROOT.SetBatch(True)
+
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("filename", help="input file with histogram")
+    parser.add_argument("histname", help="histogram name")
+    parser.add_argument("outhistname", help="outhistogram name")
+    parser.add_argument("outname", help="output file for the new histogram")
+    args = parser.parse_args()
+
+    with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout:
+        hist = fin.Get(args.histname)
+        hist.SetDirectory(0)
+        #hist.Scale(0.000000001)
+        hist.Scale(1./BR)
+        hist2 = TH1F(args.outhistname, "", len(OUTPUT_BINS) - 1, array('d', OUTPUT_BINS))
+        merge_bins = [20] # dummy large number so as not to merge
+        # merge bins = [7, 9] # indices of bins to merge
+        ind = 0
+        for binn in range(1, hist.GetNbinsX() + 1):
+            print(f"Old hist bin {binn} low edge {hist.GetBinLowEdge(binn)} "\
+                  f"up edge {hist.GetXaxis().GetBinUpEdge(binn)} "\
+                  f"content: {hist.GetBinContent(binn)} +/- {hist.GetBinError(binn)}")
+        for binn in range(1, hist2.GetNbinsX() + 1):
+            if binn < merge_bins[0]:
+                hist2.SetBinContent(binn, hist.GetBinContent(binn))
+                hist2.SetBinError(binn, hist.GetBinError(binn))
+            elif ind >= len(merge_bins) or binn > merge_bins[0] + len(merge_bins) / 2:
+                hist2.SetBinContent(binn, hist.GetBinContent(binn + ind))
+                hist2.SetBinError(binn, hist.GetBinError(binn + ind))
+            else:
+                bin1 = merge_bins[ind]
+                bin2 = merge_bins[ind] + 1
+                weight_sum = hist.GetBinWidth(bin1) + hist.GetBinWidth(bin2)
+                hist2.SetBinContent(binn,
+                        (hist.GetBinContent(bin1) * hist.GetBinWidth(bin1) +\
+                         hist.GetBinContent(bin2) * hist.GetBinWidth(bin2)) /\
+                        weight_sum)
+                hist2.SetBinError(binn,
+                                  math.sqrt(((hist.GetBinWidth(bin1) * hist.GetBinError(bin1)) / weight_sum) ** 2. +\
+                                            ((hist.GetBinWidth(bin2) * hist.GetBinError(bin2)) / weight_sum) ** 2.))
+                ind += 1
+            print(f"New bin {binn} low edge {hist2.GetBinLowEdge(binn)} "\
+                  f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} "\
+                  f"content: {hist2.GetBinContent(binn)} +/- {hist2.GetBinError(binn)} ind {ind}")
+        hist2.SetMarkerSize(hist.GetMarkerSize())
+        hist2.SetMarkerColor(hist.GetMarkerColor())
+        hist2.SetMarkerStyle(hist.GetMarkerStyle())
+        hist2.SetLineWidth(hist.GetLineWidth())
+        hist2.SetLineColor(hist.GetLineColor())
+        hist2.SetLineStyle(hist.GetLineStyle())
+        fout.cd()
+        hist2.Write()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/modify_crosssec_run3.py
@@ -0,0 +1,70 @@
+# pylint: disable=missing-function-docstring
+"""
+file:  modify_input.py
+brief: Perform adjustments on the input Run 3 cross section histogram.
+usage: python3 modify_crosssec_run3.py file.root my_histo out_histo file_out.root
+author: Maja Karwowska <[email protected]>, Warsaw University of Technology
+"""
+
+import argparse
+
+from ROOT import (  # pylint: disable=import-error,no-name-in-module
+    TFile,
+    gROOT,
+)
+
+# 2024 values for LHC22o
+MLHEP_EV_SEL = 20430386.
+NORM = 47092223769.611162532
+BR = 0.0623
+
+# 2025 values for LHC23_pass4_thin
+MLHEP_EV_SEL = 258442910841. # 2 x 10^1
+NORM = 3.0077675e+11
+
+# 2025 values for multiplicity analysis
+EV_SEL_MULT = 290860860000.
+NORM_MB = 249371059919
+NORM_2 = 37884927886
+EV_FACTOR_2 = 0.85
+NORM_3 = 50023302929
+EV_FACTOR_3 = 0.91
+NORM_4 = 49545723906
+EV_FACTOR_4 = 0.96
+NORM_5 = 49300695562
+EV_FACTOR_5 = 0.98
+NORM_6 = 22192632583
+EV_FACTOR_6 = 0.99
+NORM_7 = 2476292886
+EV_FACTOR_7 = 1.0
+
+def main():
+    """
+    Main function.
+    """
+    gROOT.SetBatch(True)
+
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("filename", help="input file with histogram")
+    parser.add_argument("histname", help="histogram name")
+    parser.add_argument("outhistname", help="outhistogram name")
+    parser.add_argument("outname", help="output file for the new histogram")
+    args = parser.parse_args()
+
+    with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout:
+        hist = fin.Get(args.histname)
+        hist2 = hist.Clone(args.outhistname)
+        hist2.SetDirectory(0)
+        #hist2.Scale(0.000001 * MLHEP_EV_SEL / NORM)
+        #hist2.Scale(0.000001)
+        #hist.Scale(1./59400000000) # luminosity scaling, lumi in pb
+        #hist.Scale(BR) # BR scaling back
+
+        hist2.Scale(EV_SEL_MULT / NORM_7)
+        hist2.Scale(EV_FACTOR_7)
+        fout.cd()
+        hist2.Write()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py b/machine_learning_hep/scripts-dhadrons/adjusting-run2-run3/remove_high_pt.py
@@ -0,0 +1,55 @@
+# pylint: disable=missing-function-docstring
+"""
+file:  remove_high_pt.py
+brief: Remove bins with pT > max_pt in all histograms matching my_histos_pattern in the input file.root.
+usage: python3 remove_high_pt.py file.root my_histos_pattern file_out.root max_pt
+author: Maja Karwowska <[email protected]>, Warsaw University of Technology
+"""
+
+import argparse
+from array import array
+
+from ROOT import TH1F, TFile, gROOT  # pylint: disable=import-error,no-name-in-module
+
+
+def main():
+    """
+    Main function.
+    """
+    gROOT.SetBatch(True)
+
+    parser = argparse.ArgumentParser(description="Arguments to pass")
+    parser.add_argument("filename", help="input file with histogram")
+    parser.add_argument("histname", help="histogram name pattern")
+    parser.add_argument("outname", help="output file for the new histogram")
+    parser.add_argument("maxval", type=float, help="maxval in histogram")
+    args = parser.parse_args()
+
+    with TFile(args.filename) as fin, TFile(args.outname, "recreate") as fout:
+        histnames = [key.GetName() for key in fin.GetListOfKeys() if args.histname in key.GetName()]
+        for histname in histnames:
+            hist = fin.Get(histname)
+            hist.SetDirectory(0)
+            last_bin = hist.GetXaxis().FindBin(args.maxval)
+            bins = []
+            for binn in range(1, last_bin + 1):
+                bins.append(hist.GetBinLowEdge(binn))
+            hist2 = TH1F(histname, "", len(bins) - 1, array('d', bins))
+            for binn in range(1, last_bin + 1):
+                hist2.SetBinContent(binn, hist.GetBinContent(binn))
+                hist2.SetBinError(binn, hist.GetBinError(binn))
+                print(f"Setting bin {binn} low edge {hist2.GetBinLowEdge(binn)} " \
+                      f"up edge {hist2.GetXaxis().GetBinUpEdge(binn)} content to content " \
+                      f"from bin {binn}: {hist2.GetBinContent(binn)}")
+            hist2.SetMarkerSize(hist.GetMarkerSize())
+            hist2.SetMarkerColor(hist.GetMarkerColor())
+            hist2.SetMarkerStyle(hist.GetMarkerStyle())
+            hist2.SetLineWidth(hist.GetLineWidth())
+            hist2.SetLineColor(hist.GetLineColor())
+            hist2.SetLineStyle(hist.GetLineStyle())
+            fout.cd()
+            hist2.Write()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/machine_learning_hep/scripts-dhadrons/debugging/README.md b/machine_learning_hep/scripts-dhadrons/debugging/README.md
@@ -0,0 +1,26 @@
+# Verify different stages of MLHEP processing
+
+## Check MLHEP output data files
+
+File: `check_parquet.py`<br>
+Usage: `python check_parquet.py in_file.parquet`
+
+The Python script contains some examples of how to read from a parquet file, print some useful information, and plot histograms.
+
+It can be used to check MLHEP skimming, training, and application outputs by testing individual parquet files.
+
+## Compare prompt fractions calculated with different inputs or methods
+
+Files: `plot_prompt_fraction_vs_crosssec_configs.py`, `config_fraction_vs_crosssec_configs.json`<br>
+Usage: `python plot_prompt_fraction_vs_crosssec_configs.py config_fraction_vs_crosssec_configs.json`
+
+Adjust the JSON config. You can provide as many histogram files in the `hists` dictionary as you want.
+By adjusting `histoname`, you can plot also the non-prompt fraction.
+
+## Plot prompt fraction vs different BDT cuts
+
+Files: `plot_prompt_fraction_vs_bdt_cuts.py`, `config_fraction_vs_bdt_cuts.json`<br>
+Usage: `python plot_prompt_fraction_vs_bdt_cuts.py config_fraction_vs_bdt_cuts.json`
+
+Adjust the JSON config. Here, you provide a glob pattern to all files of interest.
+By adjusting `histoname`, you can plot also the non-prompt fraction.