Skip to content

Clean up common code and jet databases #979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: run3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 10 additions & 122 deletions machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,17 @@ D0Jet_pp:
var_binning: fPt
dofullevtmerge: false

# obsolete, to be removed
# var_cand: fCandidateSelFlag
# # var_swap: fIsCandidateSwapped
bitmap_sel:
var_name: fFlagMcMatchRec
var_name_gen: fFlagMcMatchGen
var_name_origgen: fOriginMcGen
var_name_origrec: fOriginMcRec
var_isstd: isstd
# var_name_gen: fFlagMcMatchGen # unused
# var_name_origgen: fOriginMcGen # unused
# var_name_origrec: fOriginMcRec # unused
var_ismcsignal: ismcsignal
var_ismcprompt: ismcprompt
var_ismcfd: ismcfd
var_ismcbkg: ismcbkg
var_ismcrefl: ismcrefl
isstd: [[1], []]
ismcsignal: [[0], []]
ismcprompt: [[0], [1]]
ismcfd: [[1], [0]]
ismcbkg: [[], [1]]
ismcrefl: [[1], [1]] # probably missing from tree creator

#region dfs
dfs:
Expand Down Expand Up @@ -360,13 +351,7 @@ D0Jet_pp:

#region analysis
analysis:
anahptspectrum: "D0Kpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp # used in analysis/analyzerdhadrons_mult.py
fd_method: "Nb" #fc, Nb
cctype: "pp"
inputfonllpred: data/fonll/D0DplusDstarPredictions_13TeV_y05_all_300416_BDShapeCorrected.root # used in machine_learning_hep/hf_pt_spectrum.py
dir_general_plots: /data2/jklein/data/analysis_plots

jet_obs: &jet_default
jet_obs:
sel_an_binmin: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins)
sel_an_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24, 48] # hadron pt bins (sel_an_binmin bins) # FIXME: move the last edge in sel_an_binmin
bins_ptjet: [5, 7, 15, 30, 50, 70] # systematics, TODO: split rec and gen binning
Expand All @@ -375,9 +360,9 @@ D0Jet_pp:
counter_read_data: fReadCountsWithTVXAndZVertexAndSel8
counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC
counter_tvx: fReadCountsWithTVX
xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # used # systematics
xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # systematics
lumi_scale_mc: 408 # charm enhancement factor in MC to scale the MC luminosity
branching_ratio: 3.947e-2 # used
branching_ratio: 3.947e-2

observables:
zg:
Expand Down Expand Up @@ -710,8 +695,8 @@ D0Jet_pp:
index_match: fIndexArrayD0CMCPJETOS_hf
correction_method: run3

unfolding_iterations: 8 # used, maximum iteration
unfolding_iterations_sel: 5 # used, selected iteration # systematics
unfolding_iterations: 8 # maximum iteration
unfolding_iterations_sel: 5 # selected iteration # systematics
unfolding_prior_flatness: 0. # ranges from 0. (no flatness) to 1. (flat)

closure:
Expand All @@ -725,123 +710,26 @@ D0Jet_pp:
fd_root: "/data2/vkucera/powheg/trees_powheg_fd_central.root" # systematics
fd_parquet: "/data2/jklein/powheg/trees_powheg_fd_central.parquet" # systematics

# obsolete?
proc_type: Jets # used
useperiod: [1] #list of periods # used
# usejetptbinned_deff: false
# doeff_resp: true #efficiency correction for the response matrix
# unmatched_gen: true
proc_type: Jets
useperiod: [1] #list of periods
latexnamehadron: "D^{0}"
# latexnamedecay: "K^{#minus} #pi^{#plus}"
var_binning2: pt_jet
# var_binning2_gen: pt_gen_jet
latexbin2var: "#it{p}_{T}^{jet ch}"
# sel_binmin2_reco: [5, 7, 15, 30] # rec jet pt bins (sel_binmin2_reco bins)
# sel_binmax2_reco: [7, 15, 30, 50] # rec jet pt bins (sel_binmin2_reco bins)
# sel_binmin2_gen: [5, 7, 15, 30] # rec jet pt bins (sel_binmin2_reco bins)
# sel_binmax2_gen: [7, 15, 30, 50] # rec jet pt bins (sel_binmin2_reco bins)
# var_binningshape: zg_jet
# var_binningshape_gen: zg_gen_jet
# var_shape_latex: "shape"
# sel_binminshape_reco: [-0.1,0.1,0.2,0.3,0.4]
# sel_binmaxshape_reco: [0.1,0.2,0.3,0.4,0.5]
# sel_binminshape_gen: [-0.1,0.1,0.2,0.3,0.4]
# sel_binmaxshape_gen: [0.1,0.2,0.3,0.4,0.5]
# sel_closure_frac: 0.2
# triggerbit: INT7
#jetsel_gen: "abs(y_cand) < 0.8 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5"
#jetsel_sim: "abs(y_cand) < 0.8 and abs(eta_jet) < 0.5" # jet selection in simulations
#jetsel_reco: "abs(y_cand) < 0.8 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5"
#jetsel_gen_matched_reco: "abs(eta_gen_jet) < 5.0"
# jetsel_gen: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5"
# jetsel_sim: "abs(y_cand) < 0.5 and abs(eta_jet) < 0.5" # jet selection in simulations
# jetsel_reco: "abs(y_cand) < 0.5 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5"
# jetsel_gen_matched_reco: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_gen_jet) < 0.5"
evtsel: null # fIsEventReject==0
triggersel:
data: "trigger_hasbit_INT7==1"
mc: null
data: &data_out_default
runselection: [null] #FIXME # used but useless
results: ["/home/${USER}/mlhep/d0jet/jet_obs/default/default/data/results"] #list of periods
resultsallp: "/home/${USER}/mlhep/d0jet/jet_obs/default/default/data/results_all"
mc: &mc_out_default
runselection: [null] #FIXME # used but useless
results: ["/home/${USER}/mlhep/d0jet/jet_obs/default/default/mc/results"] #list of periods
resultsallp: "/home/${USER}/mlhep/d0jet/jet_obs/default/default/mc/results_all"
data_proc: # alternative processor output used as the analyzer input
<<: *data_out_default
mc_proc: # alternative processor output used as the analyzer input
<<: *mc_out_default

# simple fitter START # used in cplusutilities/mass_fitter.C
# sgnfunc: [0,0,0,0,0,0,0,0,0,0,0,0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins)
# bkgfunc: [0,0,0,0,0,0,0,0,0,0,0,0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins)
# masspeak: 1.864
# massmin: [1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66,1.66] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2]
# massmax: [2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06,2.06] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2]
# rebin: [6,6,6,6,6,6,6,6,6,6,6,6] # sel_an_binmin bins
# fix_mean: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
# masspeaksec: 1.864

# obsolete (uses Ali... fitter)
# If SetArraySigma true: sigma_initial is taken from sigmaarray; false: sigma_initial is taken from MC
# If SetFixGaussianSigma true: sigma fixed to sigma_initial
# SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
# SetFixGaussianSigma: [true, true, true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins
# SetArraySigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
# sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] # initial sigma (sel_an_binmin bins)

# fix_sigmasec: [true, true, true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins
# sigmaarraysec: [0.007497,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] # sel_an_binmin bins
# use_reflections: true
# simple fitter END

# signal_sigma: 2.0
# sigma_scale: 0.9545
# sideband_sigma_1_left: 4
# sideband_sigma_2_left: 9
# sideband_sigma_1_right: 4
# sideband_sigma_2_right: 9
# sidebandleftonly: false

# niterunfolding: 15
# niterunfoldingchosen: 4

# doprior: false
# domodeldep: false
# path_modeldep: /home/nzardosh/PYTHIA_Sim/PYTHIA8_Simulations/Plots/D0_Substructure_Simulations_Output.root

# replace with fd_root...
# powheg_path_nonprompt: /data/POWHEG/trees_powheg_fd_central.root
# powheg_path_prompt: /data/POWHEG/trees_powheg_pr_central.root
# powheg_prompt_variations_path: /data/POWHEG/trees_powheg_pr_
# powheg_prompt_variations: ["F1_R05","F05_R1","F2_R1","F1_R2","F2_R2","F05_R05","Mhigh","Mlow"]

# pythia8_prompt_variations_path: /data/PYTHIA8/trees_pythia8_pr_
# pythia8_prompt_variations: ["default", "charm_lo"] #["default","colour0soft"]
# pythia8_prompt_variations_legend: ["PYTHIA 8 (Monash)", "PYTHIA 8 charm LO"] # ["PYTHIA 8 (Monash)","PYTHIA 8 SoftQCD, mode 0"]

variations_db: database_variations_D0Jet_pp_jet_obs.yml

# Additional cuts applied before mass histogram is filled
use_cuts: True # systematics
cuts: ["mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.05", "mlBkgScore < 0.06", "mlBkgScore < 0.08", "mlBkgScore < 0.08", "mlBkgScore < 0.10", "mlBkgScore < 0.10", "mlBkgScore < 0.20", "mlBkgScore < 0.25", "mlBkgScore < 0.30"] # (sel_an_binmin bins) systematics FIXME: Update for new model.

systematics: # used in machine_learning_hep/analysis/systematics.py
probvariation:
useperiod: [0, 0, 1] #period from where to define prob cuts
ncutvar: 10 #number of looser and tighter variations
maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var
cutvarminrange: [0.80, 0.80, 0.6, 0.3, 0.3] #Min starting point for scan
cutvarmaxrange: [0.98, 0.95, 0.95, 0.95, 0.95] #Max starting point for scan
fixedmean: True #Fix mean cutvar histo to central fit
fixedsigma: True #Fix sigma cutvar histo to central fit
mcptshape:
#FONLL / generated LHC19h4c1
weights: [1.000000]
#From SetPtWeightsFromFONLL13overLHC17c3a12 in AliPhysics
#weights: [1.429770]
weights_min_pt: 0
weights_max_pt: 40
weights_bins: 400
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,18 @@ LcJet_pp:
sel_skim_binmax: [2, 3, 4, 5, 6, 7, 8, 10, 12, 24] # skimming pt bins (sel_skim_binmin bins)
var_binning: fPt
dofullevtmerge: false
var_cand: fCandidateSelFlag
# var_swap: fIsCandidateSwapped

bitmap_sel:
var_name: fFlagMcMatchRec
var_name_gen: fFlagMcMatchGen
var_name_origgen: fOriginMcGen
var_name_origrec: fOriginMcRec
var_isstd: isstd
# var_name_gen: fFlagMcMatchGen # unused
# var_name_origgen: fOriginMcGen # unused
# var_name_origrec: fOriginMcRec # unused
var_ismcsignal: ismcsignal
var_ismcprompt: ismcprompt
var_ismcfd: ismcfd
var_ismcbkg: ismcbkg
var_ismcrefl: ismcrefl
isstd: [[1], []]
ismcsignal: [[1], []]
ismcprompt: [[0], []]
ismcfd: [[1], []]
ismcbkg: [[], [1]]
ismcrefl: [[1], [1]] # probably missing from tree creator

#region dfs
dfs:
Expand Down Expand Up @@ -337,14 +330,7 @@ LcJet_pp:

#region analysis
analysis:
anahptspectrum: "LctopKpi" #D0Kpi, DplusKpipi, DstarD0pi, DsKKpi, LctopKpi, LcK0Sp
fd_method: "Nb" #fc, Nb
cctype: "pp"
sigmamb: 57.8e-3 #NB: multiplied by 1e12 before giving to HFPtSpectrum!
inputfonllpred: data/fonll/DmesonLcPredictions_13TeV_y05_FFptDepLHCb_BRpythia8_PDG2020.root
dir_general_plots: "/data2/${USER}/data/analysis_plots"

jet_obs: &jet_default
jet_obs:
sel_an_binmin: [2, 3, 4, 5, 6, 7, 8, 10, 12, 16] # hadron pt bins (sel_an_binmin bins)
sel_an_binmax: [3, 4, 5, 6, 7, 8, 10, 12, 16, 24] # hadron pt bins (sel_an_binmin bins)
bins_ptjet: [2, 5, 7, 10, 15, 30] # systematics, TODO: split rec and gen binning
Expand All @@ -353,9 +339,9 @@ LcJet_pp:
counter_read_data: fReadCountsWithTVXAndZVertexAndSel8
counter_read_mc: fReadCountsWithTVXAndZVertexAndSelMC
counter_tvx: fReadCountsWithTVX
xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # used # systematics
xsection_inel: 59.4 # (mb) cross-section of minimum-bias events # systematics
lumi_scale_mc: 408 # charm enhancement factor in MC to scale the MC luminosity
branching_ratio: 6.24e-2 # used
branching_ratio: 6.24e-2

observables:
zg:
Expand Down Expand Up @@ -474,8 +460,8 @@ LcJet_pp:
extra_cols: ["mlBkgScore"]
correction_method: run3

unfolding_iterations: 8 # used, maximum iteration
unfolding_iterations_sel: 5 # used, selected iteration # systematics
unfolding_iterations: 8 # maximum iteration
unfolding_iterations_sel: 5 # selected iteration # systematics
unfolding_prior_flatness: 0. # ranges from 0. (no flatness) to 1. (flat)

fd_folding_method: 3d
Expand All @@ -484,119 +470,24 @@ LcJet_pp:

proc_type: Jets
useperiod: [1] #list of periods
usejetptbinned_deff: false
doeff_resp: true #efficiency correction for the response matrix
unmatched_gen: true
latexnamehadron: "#Lambda_{c}^{#plus}"
latexnamedecay: "pK#pi"
var_binning2: pt_jet
var_binning2_gen: pt_gen_jet
latexbin2var: "#it{p}_{T}^{jet ch}"
sel_binmin2_reco: [7.0, 15.0, 30.0] # rec jet pt bins (sel_binmin2_reco bins)
sel_binmax2_reco: [15.0, 30.0, 50.0] # rec jet pt bins (sel_binmin2_reco bins)
sel_binmin2_gen: [7.0, 15.0, 30.0] # gen jet pt bins (sel_binmin2_gen bins)
sel_binmax2_gen: [15.0, 30.0, 50.0] # gen jet pt bins (sel_binmin2_gen bins)
var_binningshape: zg_jet
var_binningshape_gen: zg_gen_jet
var_shape_latex: "#it{z}_{g}"
sel_binminshape_reco: [-0.1, 0.1, 0.2, 0.3, 0.4]
sel_binmaxshape_reco: [0.1, 0.2, 0.3, 0.4, 0.5]
sel_binminshape_gen: [-0.1, 0.1, 0.2, 0.3, 0.4]
sel_binmaxshape_gen: [0.1, 0.2, 0.3, 0.4, 0.5]
sel_closure_frac: 0.2
triggerbit: INT7
#jetsel_gen: "abs(y_cand) < 0.8 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5"
#jetsel_sim: "abs(y_cand) < 0.8 and abs(eta_jet) < 0.5" # jet selection in simulations
#jetsel_reco: "abs(y_cand) < 0.8 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5"
#jetsel_gen_matched_reco: "abs(eta_gen_jet) < 5.0"
jetsel_gen: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_jet) < 0.5"
jetsel_sim: "abs(y_cand) < 0.5 and abs(eta_jet) < 0.5" # jet selection in simulations
jetsel_reco: "abs(y_cand) < 0.5 and abs(z_vtx_reco) < 10 and abs(eta_jet) < 0.5"
jetsel_gen_matched_reco: "abs(y_cand) < 0.5 and abs(z_vtx_gen) < 10 and abs(eta_gen_jet) < 0.5"
evtsel: fIsEventReject==0
triggersel:
data: "trigger_hasbit_INT7==1"
mc: null
data: &data_out_default
runselection: [null] #FIXME
results: ["/home/${USER}/mlhep/lcjet/jet_obs/default/default/data/results"] #list of periods
resultsallp: "/home/${USER}/mlhep/lcjet/jet_obs/default/default/data/results_all"
mc: &mc_out_default
runselection: [null, null] #FIXME
results: ["/home/${USER}/mlhep/lcjet/jet_obs/default/default/mc/results"] #list of periods
resultsallp: "/home/${USER}/mlhep/lcjet/jet_obs/default/default/mc/results_all"
data_proc: # alternative processor output used as the analyzer input
<<: *data_out_default
mc_proc: # alternative processor output used as the analyzer input
<<: *mc_out_default

# simple fitter START
sgnfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kGaus=0, k2Gaus=1, k2GausSigmaRatioPar=2 (sel_an_binmin bins)
bkgfunc: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # kExpo=0, kLin=1, kPol2=2, kNoBk=3, kPow=4, kPowEx=5 (sel_an_binmin bins)
masspeak: 2.286
massmin: [1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66, 1.66] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2]
massmax: [2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06, 2.06] # sel_an_binmin bins, fit region of the invariant mass distribution [GeV/c^2]
rebin: [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] # sel_an_binmin bins
fix_mean: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
masspeaksec: 2.286

# If SetArraySigma true: sigma_initial is taken from sigmaarray; false: sigma_initial is taken from MC
# If SetFixGaussianSigma true: sigma fixed to sigma_initial
# SetFixGaussianSigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
SetFixGaussianSigma: [true, true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins
SetArraySigma: [false, false, false, false, false, false, false, false, false, false, false, false] # sel_an_binmin bins
sigmaarray: [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01] # initial sigma (sel_an_binmin bins)

fix_sigmasec: [true, true, true, true, true, true, true, true, true, true] # sel_an_binmin bins
sigmaarraysec: [0.007497, 0.007497, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01] # sel_an_binmin bins
use_reflections: true
# simple fitter END

signal_sigma: 2.0
sigma_scale: 0.9545
sideband_sigma_1_left: 4
sideband_sigma_2_left: 9
sideband_sigma_1_right: 4
sideband_sigma_2_right: 9
sidebandleftonly: false

niterunfolding: 15
niterunfoldingchosen: 4

doprior: false
domodeldep: false
path_modeldep: /home/nzardosh/PYTHIA_Sim/PYTHIA8_Simulations/Plots/D0_Substructure_Simulations_Output.root

powheg_path_nonprompt: /data/POWHEG/trees_powheg_fd_central.root

powheg_path_prompt: /data/POWHEG/trees_powheg_pr_central.root
powheg_prompt_variations_path: /data/POWHEG/trees_powheg_pr_
powheg_prompt_variations: ["F1_R05", "F05_R1", "F2_R1", "F1_R2", "F2_R2", "F05_R05", "Mhigh", "Mlow"]

pythia8_prompt_variations_path: /data/PYTHIA8/trees_pythia8_pr_
pythia8_prompt_variations: ["default", "charm_lo"] #["default","colour0soft"]
pythia8_prompt_variations_legend: ["PYTHIA 8 (Monash)", "PYTHIA 8 charm LO"] # ["PYTHIA 8 (Monash)","PYTHIA 8 SoftQCD, mode 0"]

variations_db: database_variations_LcJet_pp_jet_obs.yml

# Additional cuts applied before mass histogram is filled
use_cuts: True
cuts: ["mlBkgScore < 0.03", "mlBkgScore < 0.04", "mlBkgScore < 0.07", "mlBkgScore < 0.09", "mlBkgScore < 0.11", "mlBkgScore < 0.15", "mlBkgScore < 0.18", "mlBkgScore < 0.25", "mlBkgScore < 0.35", "mlBkgScore < 0.35"] # (sel_an_binmin bins) systematics FIXME: Update for new model.

systematics: # used in machine_learning_hep/analysis/systematics.py
probvariation:
useperiod: [0, 0, 1] #period from where to define prob cuts
ncutvar: 10 #number of looser and tighter variations
maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var
cutvarminrange: [0.80, 0.80, 0.6, 0.3, 0.3] #Min starting point for scan
cutvarmaxrange: [0.98, 0.95, 0.95, 0.95, 0.95] #Max starting point for scan
fixedmean: True #Fix mean cutvar histo to central fit
fixedsigma: True #Fix sigma cutvar histo to central fit
mcptshape:
#FONLL / generated LHC19h4c1
weights: [1.000000]
#From SetPtWeightsFromFONLL13overLHC17c3a12 in AliPhysics
#weights: [1.429770]
weights_min_pt: 0
weights_max_pt: 40
weights_bins: 400
Loading
Loading